Compare commits

..

61 Commits

Author SHA1 Message Date
62dea1bb63 21Dec16 patch 2016-12-21 09:53:32 -07:00
800ff43413 Merge pull request #304 from timattox/USER-DPD_whitespace
USER-DPD: Whitespace cleanup to pair_dpd_fdt_energy.cpp
2016-12-21 09:51:10 -07:00
9161bd98bf fixed bug with pair hybrid/overaly and manybody potentials finding the right skip neighbor method 2016-12-21 09:50:29 -07:00
c4e02a5d2b USER-DPD: more whitespace fixes 2016-12-20 11:17:11 -05:00
2ba424e1a3 USER-DPD: Whitespace cleanup to pair_dpd_fdt_energy.cpp 2016-12-19 15:11:52 -05:00
81a2db8a0c 17Dec16 patch 2016-12-16 11:36:54 -07:00
0a176841e7 extra python_wrapper change needed for last patch 2016-12-16 11:35:42 -07:00
3027ac9250 patch 16Dec16 2016-12-16 10:30:57 -07:00
fc54ab5cea Merge pull request #301 from akohlmey/corrections-and-bugfixes
Collected corrections and bugfixes
2016-12-16 10:25:29 -07:00
e364b80724 added length keyword to python command 2016-12-16 10:24:25 -07:00
830c9e8661 Merge branch 'USER-DPD_internal_energy' of https://github.com/timattox/lammps_USER-DPD into corrections-and-bugfixes
This closes #303
2016-12-16 11:22:25 -05:00
4907b29ad2 Merge branch 'USER-DPD_bugfixes' of https://github.com/timattox/lammps_USER-DPD into corrections-and-bugfixes
This closes #302
2016-12-16 11:21:15 -05:00
eff7238ff2 USER-DPD: fix_eos*: partition all internal energy into the uMech term only
This makes our results more closely match a vetted serial implementation.
NOTE: This does make the output different from any previous versions.
Patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-16 10:25:12 -05:00
126fb22e93 USER-DPD: Fix #define typo in pair_multi_lucy.h and pair_multi_lucy_rx.h 2016-12-16 10:08:30 -05:00
0a90492c44 USER-DPD: Update the header files to properly document all error statements
Patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-15 17:39:15 -05:00
fed629c23e USER-DPD: Bugfix for fix_rx and fix_eos_table_rx to handle restart files.
Patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-15 17:10:13 -05:00
925481c3f4 USER-DPD: Fix hard-wall force interaction bug, and ensure fraction is >= 0
pair_exp6_rx.cpp patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-15 16:46:25 -05:00
da2ad5b6e0 update FixIntel code for new neighbor list code 2016-12-14 15:51:12 -05:00
bfcab72268 restore change to make -DLAMMPS_MEMALIGN=64 default when USER-INTEL package is installed (which requires it) 2016-12-14 15:24:55 -05:00
f509f133af patch 13Dec16: neighbor refactor, Stan pppm/disp bug fix, M Brown INTEL package updates 2016-12-13 17:14:28 -07:00
624c57e9da Merge pull request #185 from akohlmey/new-neighbor
New neighbor list code with updates for USER-OMP and USER-DPD
2016-12-13 16:24:41 -07:00
f3b355bcbe Merge pull request #298 from akohlmey/collected-small-fixes
Collected small fixes
2016-12-13 16:23:23 -07:00
ae5764beac added functionity to lib interface 2016-12-13 16:22:17 -07:00
fda43c00fd add deleted file in package to purge list 2016-12-12 13:22:54 -05:00
b34be30be6 Merge pull request #53 from stanmoore1/new-neighbor
New neighbor Kokkos
2016-12-12 13:18:03 -05:00
13b6196b82 Fixing Kokkos compile error 2016-12-12 10:47:39 -07:00
baf55c90f4 Whitespace change 2016-12-12 09:25:41 -07:00
770f5d0bf7 Whitespace change 2016-12-12 09:24:37 -07:00
a31b00965a Updating to master 2016-12-12 09:18:20 -07:00
a5e46e3e6a Merging 2016-12-09 16:20:42 -07:00
31be0da590 Merging pull request 2016-12-09 16:17:35 -07:00
0f3b2544a1 Merge pull request #1 from timattox/new-neighbor
USER-DPD workaround for neighbor list issues
2016-12-09 16:08:31 -07:00
586514e05c Merge branch 'new-neighbor' into new-neighbor 2016-12-09 16:08:08 -07:00
43c459ba56 More changes for Kokkos neighbor 2016-12-09 15:56:55 -07:00
b5c3d2f66c Merge pull request #52 from timattox/new-neighbor
USER-DPD workaround for neighbor list issues
2016-12-09 17:51:35 -05:00
5187cb97e5 USER-DPD: Make fix_shardlow request its own SSA-specific neighbor list,
instead of having pair_dpd_fdt* make the SSA-neighbor list request for it.
Forces an "extra" list to be built, but now skip lists work properly.
Maybe we can detect if skip lists won't be used, and squash the extra list.
2016-12-09 15:42:27 -06:00
eff503e56c Prevent neighbor list copies between SSA and non-SSA neighbor list requests. 2016-12-09 15:39:46 -06:00
cdcebab3bd make the output of the %CPU/OpenMP threads line consistent with compiling in OpenMP support, not having USER-OMP installed 2016-12-09 14:43:56 -05:00
ddf678da51 make fix gcmc command overview consistent
this closes #296
2016-12-09 14:30:27 -05:00
435421301b Small tweaks to Kokkos neighbor 2016-12-09 08:37:01 -07:00
9b48c49f83 Removing used Kokkos file 2016-12-08 09:18:55 -07:00
d3d5ac17bf Fixed small typos in doc 2016-12-07 19:37:51 -08:00
8318c67816 Kokkos neighbor refactor 2016-12-07 13:00:27 -07:00
7c61dbf5e2 Merge branch 'new-neighbor' of github.com:akohlmey/lammps into new-neighbor 2016-12-07 13:43:04 -05:00
39a12b15d7 Merge branch 'master' into new-neighbor
Resolved Conflicts:
	src/Purge.list
	src/neigh_derive.cpp
2016-12-07 13:40:14 -05:00
114926a488 Merge branch 'master' into new-neighbor 2016-10-02 00:26:56 -04:00
5eb9dd0c5d Merge branch 'master' into new-neighbor 2016-09-29 23:14:28 -04:00
ebabc8f0bc Merge remote-tracking branch 'lammps-rw/integration' into update-neighbor 2016-09-09 15:46:20 -04:00
232abf8534 restore locale and enforce grep option squashing 2016-09-09 15:42:14 -04:00
d22caf2658 Merge pull request #29 from timattox/new-neighbor
USER-DPD: remove several files from src that came from src/USER-DPD/
2016-09-09 15:28:56 -04:00
3842aa6095 forward skip lists /omp neighbor list builds to non-omp implementations 2016-09-09 15:23:40 -04:00
32c240978a USER-DPD: remove several files from src that came from src/USER-DPD/
These were accidentally added to git in c9c2ae6.
2016-09-09 15:17:42 -04:00
212c2617f6 delete a couple more files, that don't belong into src/ 2016-09-09 14:56:23 -04:00
40f85c93ba corrected mismatched free() vs. delete[] 2016-09-09 14:10:42 -04:00
2f02d98469 remove USER-DPD files that should not be where they are 2016-09-09 13:53:07 -04:00
4553881fc2 Merge pull request #28 from timattox/new-neighbor
New neighbor, USER-DPD updates
2016-09-09 13:11:55 -04:00
81fcbcd99c USER-DPD: move nstencil_ssa out of core LAMMPS into USER-DPD 2016-09-09 12:19:54 -04:00
82c6eb4675 USER-DPD: Set missing NP_HALF flag in npair_half_bin_newton_ssa.h 2016-09-09 12:19:36 -04:00
8ed3f4226e USER-DPD: move custom binning stuff to a NBinSSA child class.
Removes most SSA specific fields from class NeighList.
2016-09-09 12:19:06 -04:00
9b7a0d7e1c Update gitignore for the new USER-DPD source files. 2016-09-09 12:18:51 -04:00
c9c2ae6c61 new neighbor list changes 2016-09-07 13:42:58 -06:00
377 changed files with 24762 additions and 16636 deletions

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="30 Nov 2016 version">
<META NAME="docnumber" CONTENT="21 Dec 2016 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
<H1></H1>
LAMMPS Documentation :c,h3
30 Nov 2016 version :c,h4
21 Dec 2016 version :c,h4
Version info: :h4

View File

@ -1936,18 +1936,22 @@ documentation in the src/library.cpp file for details, including
which quantities can be queried by name:
void *lammps_extract_global(void *, char *)
void lammps_extract_box(void *, double *, double *,
double *, double *, double *, int *, int *)
void *lammps_extract_atom(void *, char *)
void *lammps_extract_compute(void *, char *, int, int)
void *lammps_extract_fix(void *, char *, int, int, int, int)
void *lammps_extract_variable(void *, char *, char *) :pre
int lammps_set_variable(void *, char *, char *)
double lammps_get_thermo(void *, char *) :pre
void lammps_reset_box(void *, double *, double *, double, double, double)
int lammps_set_variable(void *, char *, char *) :pre
double lammps_get_thermo(void *, char *)
int lammps_get_natoms(void *)
void lammps_gather_atoms(void *, double *)
void lammps_scatter_atoms(void *, double *) :pre
void lammps_create_atoms(void *, int, tagint *, int *, double *, double *) :pre
void lammps_create_atoms(void *, int, tagint *, int *, double *, double *,
imageint *, int) :pre
The extract functions return a pointer to various global or per-atom
quantities stored in LAMMPS or to values calculated by a compute, fix,
@ -1957,10 +1961,16 @@ the other extract functions, the underlying storage may be reallocated
as LAMMPS runs, so you need to re-call the function to assure a
current pointer or returned value(s).
The lammps_reset_box() function resets the size and shape of the
simulation box, e.g. as part of restoring a previously extracted and
saved state of a simulation.
The lammps_set_variable() function can set an existing string-style
variable to a new string value, so that subsequent LAMMPS commands can
access the variable. The lammps_get_thermo() function returns the
current value of a thermo keyword as a double.
access the variable.
The lammps_get_thermo() function returns the current value of a thermo
keyword as a double precision value.
The lammps_get_natoms() function returns the total number of atoms in
the system and can be used by the caller to allocate space for the
@ -1973,10 +1983,13 @@ passed by the caller, to each atom owned by individual processors.
The lammps_create_atoms() function takes a list of N atoms as input
with atom types and coords (required), an optionally atom IDs and
velocities. It uses the coords of each atom to assign it as a new
atom to the processor that owns it. Additional properties for the new
atoms can be assigned via the lammps_scatter_atoms() or
lammps_extract_atom() functions.
velocities and image flags. It uses the coords of each atom to assign
it as a new atom to the processor that owns it. This function is
useful to add atoms to a simulation or (in tandem with
lammps_reset_box()) to restore a previously extracted and saved state
of a simulation. Additional properties for the new atoms can then be
assigned via the lammps_scatter_atoms() or lammps_extract_atom()
functions.
The examples/COUPLE and python directories have example C++ and C and
Python codes which show how a driver code can link to LAMMPS as a

View File

@ -51,12 +51,12 @@ relative to the center of mass (COM) velocity of the 2 atoms in the
bond.
The value {engvib} is the vibrational kinetic energy of the two atoms
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m1 v2^2, where v1 and
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m2 v2^2, where v1 and
v2 are the magnitude of the velocity of the 2 atoms along the bond
direction, after the COM velocity has been subtracted from each.
The value {engrot} is the rotationsl kinetic energy of the two atoms
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m1 v2^2, where v1 and
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m2 v2^2, where v1 and
v2 are the magnitude of the velocity of the 2 atoms perpendicular to
the bond direction, after the COM velocity has been subtracted from
each.
@ -67,7 +67,7 @@ Vcm^2 where Vcm = magnitude of the velocity of the COM.
Note that these 3 kinetic energy terms are simply a partitioning of
the summed kinetic energy of the 2 atoms themselves. I.e. total KE =
1/2 m1 v1^2 + 1/2 m2 v3^2 = engvib + engrot + engtrans, where v1,v2
1/2 m1 v1^2 + 1/2 m2 v2^2 = engvib + engrot + engtrans, where v1,v2
are the magnitude of the velocities of the 2 atoms, without any
adjustment for the COM velocity.

View File

@ -21,7 +21,7 @@ type = atom type for inserted atoms (must be 0 if mol keyword used) :l
seed = random # seed (positive integer) :l
T = temperature of the ideal gas reservoir (temperature units) :l
mu = chemical potential of the ideal gas reservoir (energy units) :l
translate = maximum Monte Carlo translation distance (length units) :l
displace = maximum Monte Carlo translation distance (length units) :l
zero or more keyword/value pairs may be appended to args :l
keyword = {mol}, {region}, {maxangle}, {pressure}, {fugacity_coeff}, {full_energy}, {charge}, {group}, {grouptype}, {intra_energy}, or {tfac_insert}
{mol} value = template-ID

View File

@ -14,7 +14,7 @@ python func keyword args ... :pre
func = name of Python function :ulb,l
one or more keyword/args pairs must be appended :l
keyword = {invoke} or {input} or {return} or {format} or {file} or {here} or {exists}
keyword = {invoke} or {input} or {return} or {format} or {length} or {file} or {here} or {exists}
{invoke} arg = none = invoke the previously defined Python function
{input} args = N i1 i2 ... iN
N = # of inputs to function
@ -29,6 +29,8 @@ keyword = {invoke} or {input} or {return} or {format} or {file} or {here} or {ex
M = N+1 if there is a return value
fstring = each character (i,f,s,p) corresponds in order to an input or return value
'i' = integer, 'f' = floating point, 's' = string, 'p' = SELF
{length} arg = Nlen
Nlen = max length of string returned from Python function
{file} arg = filename
filename = file of Python code, which defines func
{here} arg = inline
@ -165,6 +167,17 @@ equal-style variable as an argument, but only if the output of the
Python function is flagged as a numeric value ("i" or "f") via the
{format} keyword.
If the {return} keyword is used and the {format} keyword specifies the
output as a string, then the default maximum length of that string is
63 characters (64-1 for the string terminator). If you want to return
a longer string, the {length} keyword can be specified with its {Nlen}
value set to a larger number (the code allocates space for Nlen+1 to
include the string terminator). If the Python function generates a
string longer than the default 63 or the specified {Nlen}, it will be
trunctated.
:line
Either the {file}, {here}, or {exists} keyword must be used, but only
one of them. These keywords specify what Python code to load into the
Python interpreter. The {file} keyword gives the name of a file,

2
src/.gitignore vendored
View File

@ -18,6 +18,8 @@
/*_tally.cpp
/*_rx.h
/*_rx.cpp
/*_ssa.h
/*_ssa.cpp
/kokkos.cpp
/kokkos.h

View File

@ -105,11 +105,16 @@ action modify_kokkos.cpp
action modify_kokkos.h
action neigh_bond_kokkos.cpp
action neigh_bond_kokkos.h
action neigh_full_kokkos.h
action neigh_list_kokkos.cpp
action neigh_list_kokkos.h
action neighbor_kokkos.cpp
action neighbor_kokkos.h
action npair_copy_kokkos.cpp
action npair_copy_kokkos.h
action npair_kokkos.cpp
action npair_kokkos.h
action nbin_kokkos.cpp
action nbin_kokkos.h
action math_special_kokkos.cpp
action math_special_kokkos.h
action pair_buck_coul_cut_kokkos.cpp

View File

@ -125,12 +125,10 @@ void FixQEqReaxKokkos<DeviceType>::init()
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else { //if (neighflag == HALF || neighflag == HALFTHREAD)
neighbor->requests[irequest]->fix = 1;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
}
}

View File

@ -168,7 +168,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
else
neighflag = HALF;
} else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"binsize") == 0) {
@ -232,20 +231,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_list_kokkos(int m)
{
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
return 1;
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
return 1;
return 0;
}
/* ----------------------------------------------------------------------
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_count(int m)
{
int inum;
@ -255,28 +240,30 @@ int KokkosLMP::neigh_count(int m)
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m]) {
inum = nk->lists_host[m]->inum;
if (nk->lists[m]->execution_space == Host) {
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_host[m]->d_ilist;
h_numneigh = nk->lists_host[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
} else if (nk->lists_device[m]) {
inum = nk->lists_device[m]->inum;
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
} else if (nk->lists[m]->execution_space == Device) {
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_device[m]->d_ilist;
h_numneigh = nk->lists_device[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
}
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];

View File

@ -34,7 +34,6 @@ class KokkosLMP : protected Pointers {
KokkosLMP(class LAMMPS *, int, char **);
~KokkosLMP();
void accelerator(int, char **);
int neigh_list_kokkos(int);
int neigh_count(int);
private:
static void my_signal_handler(int);

145
src/KOKKOS/nbin_kokkos.cpp Normal file
View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "nbin_kokkos.h"
#include "neighbor.h"
#include "atom_kokkos.h"
#include "group.h"
#include "domain.h"
#include "comm.h"
#include "update.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
enum{NSQ,BIN,MULTI}; // also in Neighbor
#define SMALL 1.0e-6
#define CUT2BIN_RATIO 100
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
atoms_per_bin = 16;
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
#else
h_resize = d_resize;
#endif
h_resize() = 1;
}
/* ----------------------------------------------------------------------
setup neighbor binning geometry
bin numbering in each dimension is global:
0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc
nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc
-1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc
code will work for any binsize
since next(xyz) and stencil extend as far as necessary
binsize = 1/2 of cutoff is roughly optimal
for orthogonal boxes:
a dim must be filled exactly by integer # of bins
in periodic, procs on both sides of PBC must see same bin boundary
in non-periodic, coord2bin() still assumes this by use of nbin xyz
for triclinic boxes:
tilted simulation box cannot contain integer # of bins
stencil & neigh list built differently to account for this
mbinlo = lowest global bin any of my ghost atoms could fall into
mbinhi = highest global bin any of my ghost atoms could fall into
mbin = number of bins I need in a dimension
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
{
if (mbins > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
bins = k_bins.view<DeviceType>();
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
bincount = k_bincount.view<DeviceType>();
last_bin_memory = update->ntimestep;
}
last_bin = update->ntimestep;
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms()
{
h_resize() = 1;
while(h_resize() > 0) {
h_resize() = 0;
deep_copy(d_resize, h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x.view<DeviceType>();
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(h_resize, d_resize);
if(h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
bins = k_bins.view<DeviceType>();
c_bins = bins;
last_bin_memory = update->ntimestep;
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
d_resize() = 1;
}
}
namespace LAMMPS_NS {
template class NBinKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NBinKokkos<LMPHostType>;
#endif
}

153
src/KOKKOS/nbin_kokkos.h Normal file
View File

@ -0,0 +1,153 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(kk/host,
NBinKokkos<LMPHostType>,
NB_KOKKOS_HOST)
NBinStyle(kk/device,
NBinKokkos<LMPDeviceType>,
NB_KOKKOS_DEVICE)
#else
#ifndef LMP_NBIN_KOKKOS_H
#define LMP_NBIN_KOKKOS_H
#include "nbin_standard.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NBinKokkos : public NBinStandard {
public:
typedef ArrayTypes<DeviceType> AT;
NBinKokkos(class LAMMPS *);
~NBinKokkos() {}
void bin_atoms_setup(int);
void bin_atoms();
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
typename AT::t_int_scalar d_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename AT::t_x_array_randomread x;
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
private:
double bboxlo_[3],bboxhi_[3];
};
template<class DeviceType>
struct NPairKokkosBinAtomsFunctor {
typedef DeviceType device_type;
const NBinKokkos<DeviceType> c;
NPairKokkosBinAtomsFunctor(const NBinKokkos<DeviceType> &_c):
c(_c) {};
~NPairKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -34,9 +34,8 @@ void NeighListKokkos<Device>::clean_copy()
ipage = NULL;
dpage = NULL;
maxstencil = 0;
ghostflag = 0;
maxstencil_multi = 0;
maxatoms = 0;
}
/* ---------------------------------------------------------------------- */
@ -70,49 +69,6 @@ void NeighListKokkos<Device>::grow(int nmax)
/* ---------------------------------------------------------------------- */
template<class Device>
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
{
int i;
if (style == BIN) {
if (smax > maxstencil) {
maxstencil = smax;
d_stencil =
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
"neighlist:stencil");
if (ghostflag) {
memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil,
3,"neighlist:stencilxyz");
}
}
} else {
int n = atom->ntypes;
if (maxstencil_multi == 0) {
nstencil_multi = new int[n+1];
stencil_multi = new int*[n+1];
distsq_multi = new double*[n+1];
for (i = 1; i <= n; i++) {
nstencil_multi[i] = 0;
stencil_multi[i] = NULL;
distsq_multi[i] = NULL;
}
}
if (smax > maxstencil_multi) {
maxstencil_multi = smax;
for (i = 1; i <= n; i++) {
memory->destroy(stencil_multi[i]);
memory->destroy(distsq_multi[i]);
memory->create(stencil_multi[i],maxstencil_multi,
"neighlist:stencil_multi");
memory->create(distsq_multi[i],maxstencil_multi,
"neighlist:distsq_multi");
}
}
}
}
namespace LAMMPS_NS {
template class NeighListKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA

View File

@ -20,7 +20,7 @@
namespace LAMMPS_NS {
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
class AtomNeighbors
{
@ -74,14 +74,12 @@ public:
typename DAT::tdual_int_1d k_ilist; // local indices of I atoms
typename ArrayTypes<Device>::t_int_1d d_ilist;
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
typename ArrayTypes<Device>::t_int_1d_3 d_stencilxyz;
typename ArrayTypes<LMPHostType>::t_int_1d_3 h_stencilxyz;
NeighListKokkos(class LAMMPS *lmp):
NeighList(lmp) {_stride = 1; maxneighs = 16;};
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0;
execution_space = ExecutionSpaceFromDevice<Device>::space;
};
~NeighListKokkos() {numneigh = NULL; ilist = NULL;};
KOKKOS_INLINE_FUNCTION
AtomNeighbors get_neighbors(const int &i) const {
@ -99,7 +97,8 @@ public:
int& num_neighs(const int & i) const {
return d_numneigh(i);
}
void stencil_allocate(int smax, int style);
private:
int maxatoms;
};
}

View File

@ -1,4 +1,4 @@
;/* ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
@ -26,6 +26,10 @@
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "style_nbin.h"
#include "style_nstencil.h"
#include "style_npair.h"
#include "style_ntopo.h"
using namespace LAMMPS_NS;
@ -36,18 +40,11 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp),
neighbond_host(lmp),neighbond_device(lmp)
{
atoms_per_bin = 16;
nlist_host = 0;
lists_host = NULL;
pair_build_host = NULL;
stencil_create_host = NULL;
nlist_device = 0;
lists_device = NULL;
pair_build_device = NULL;
stencil_create_device = NULL;
device_flag = 0;
bondlist = NULL;
anglelist = NULL;
dihedrallist = NULL;
improperlist = NULL;
}
/* ---------------------------------------------------------------------- */
@ -58,14 +55,6 @@ NeighborKokkos::~NeighborKokkos()
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
cutneighsq = NULL;
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] pair_build_host;
memory->destroy_kokkos(k_ex_type,ex_type);
memory->destroy_kokkos(k_ex1_type,ex1_type);
memory->destroy_kokkos(k_ex2_type,ex2_type);
@ -89,6 +78,11 @@ void NeighborKokkos::init()
{
atomKK = (AtomKokkos *) atom;
Neighbor::init();
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
}
/* ---------------------------------------------------------------------- */
@ -101,158 +95,16 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
/* ---------------------------------------------------------------------- */
int NeighborKokkos::init_lists_kokkos()
{
int i;
for (i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
delete [] pair_build_host;
delete [] stencil_create_host;
nlist_host = 0;
for (i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] stencil_create_device;
nlist_device = 0;
nlist = 0;
for (i = 0; i < nrequest; i++) {
if (requests[i]->kokkos_device) nlist_device++;
else if (requests[i]->kokkos_host) nlist_host++;
else nlist++;
}
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
pair_build_host = new PairPtrHost[nrequest];
stencil_create_host = new StencilPtrHost[nrequest];
for (i = 0; i < nrequest; i++) {
lists_host[i] = NULL;
pair_build_host[i] = NULL;
stencil_create_host[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_host) continue;
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
lists_host[i]->index = i;
lists_host[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_host[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_host[i]);
}
}
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
pair_build_device = new PairPtrDevice[nrequest];
stencil_create_device = new StencilPtrDevice[nrequest];
for (i = 0; i < nrequest; i++) {
lists_device[i] = NULL;
pair_build_device[i] = NULL;
stencil_create_device[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_device) continue;
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
lists_device[i]->index = i;
lists_device[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_device[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_device[i]);
}
}
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
// return # of non-Kokkos lists
return nlist;
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags1_kokkos(int i)
void NeighborKokkos::create_kokkos_list(int i)
{
if (style != BIN)
error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists");
if (lists_host[i]) {
lists_host[i]->buildflag = 1;
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
lists_host[i]->growflag = 1;
if (requests[i]->copy) lists_host[i]->growflag = 0;
lists_host[i]->stencilflag = 1;
if (style == NSQ) lists_host[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
lists_host[i]->ghostflag = 0;
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
if (lists_device[i]) {
lists_device[i]->buildflag = 1;
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
lists_device[i]->growflag = 1;
if (requests[i]->copy) lists_device[i]->growflag = 0;
lists_device[i]->stencilflag = 1;
if (style == NSQ) lists_device[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
lists_device[i]->ghostflag = 0;
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags2_kokkos(int i)
{
if (lists_host[i]) {
if (lists_host[i]->buildflag) blist[nblist++] = i;
if (lists_host[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
if (lists_device[i]) {
if (lists_device[i]->buildflag) blist[nblist++] = i;
if (lists_device[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_grow_kokkos(int i)
{
if (lists_host[i]!=NULL && lists_host[i]->growflag)
lists_host[i]->grow(maxatom);
if (lists_device[i]!=NULL && lists_device[i]->growflag)
lists_device[i]->grow(maxatom);
if (requests[i]->kokkos_device) {
lists[i] = new NeighListKokkos<LMPDeviceType>(lmp);
device_flag = 1;
} else if (requests[i]->kokkos_host)
lists[i] = new NeighListKokkos<LMPHostType>(lmp);
}
/* ---------------------------------------------------------------------- */
@ -281,49 +133,6 @@ void NeighborKokkos::init_ex_mol_bit_kokkos()
k_ex_mol_bit.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
{
if (rq->kokkos_host != 0) {
PairPtrHost pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,0>;
}
pair_build_host[index] = pb;
}
if (rq->kokkos_device != 0) {
PairPtrDevice pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,0>;
}
pair_build_device[index] = pb;
return;
}
Neighbor::choose_build(index,rq);
}
/* ----------------------------------------------------------------------
if any atom moved trigger distance (half of neighbor skin) return 1
shrink trigger distance if box size has changed
@ -337,7 +146,7 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq)
int NeighborKokkos::check_distance()
{
if (nlist_device)
if (device_flag)
check_distance_kokkos<LMPDeviceType>();
else
check_distance_kokkos<LMPHostType>();
@ -417,7 +226,7 @@ void NeighborKokkos::operator()(TagNeighborCheckDistance<DeviceType>, const int
void NeighborKokkos::build(int topoflag)
{
if (nlist_device)
if (device_flag)
build_kokkos<LMPDeviceType>(topoflag);
else
build_kokkos<LMPHostType>(topoflag);
@ -428,18 +237,25 @@ void NeighborKokkos::build_kokkos(int topoflag)
{
typedef DeviceType device_type;
int i;
int i,m;
ago = 0;
ncalls++;
lastcall = update->ntimestep;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
// check that using special bond flags will not overflow neigh lists
if (nall > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// store current atom positions and box size if needed
if (dist_check) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x;
int nlocal = atom->nlocal;
if (includegroup) nlocal = atom->nfirst;
int maxhold_kokkos = xhold.view<DeviceType>().dimension_0();
if (atom->nmax > maxhold || maxhold_kokkos < maxhold) {
@ -471,54 +287,33 @@ void NeighborKokkos::build_kokkos(int topoflag)
}
}
// if any lists store neighbors of ghosts:
// invoke grow() if nlocal+nghost exceeds previous list size
// else only invoke grow() if nlocal exceeds previous list size
// only for lists with growflag set and which are perpetual (glist)
// bin atoms for all NBin instances
// not just NBin associated with perpetual lists
// b/c cannot wait to bin occasional lists in build_one() call
// if bin then, atoms may have moved outside of proc domain & bin extent,
// leading to errors or even a crash
if (anyghostlist && atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
} else if (atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
}
// extend atom bin list if necessary
if (style != NSQ && atom->nmax > maxbin) {
maxbin = atom->nmax;
memory->destroy(bins);
memory->create(bins,maxbin,"bins");
}
// check that using special bond flags will not overflow neigh lists
if (atom->nlocal+atom->nghost > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// invoke building of pair and molecular topology neighbor lists
// only for pairwise lists with buildflag set
// blist is for standard neigh lists, otherwise is a Kokkos list
for (i = 0; i < nblist; i++) {
if (lists[blist[i]]) {
atomKK->sync(Host,ALL_MASK);
(this->*pair_build[blist[i]])(lists[blist[i]]);
} else {
if (lists_host[blist[i]])
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
else if (lists_device[blist[i]])
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
if (style != NSQ) {
for (int i = 0; i < nbin; i++) {
neigh_bin[i]->bin_atoms_setup(nall);
neigh_bin[i]->bin_atoms();
}
}
if (atom->molecular && topoflag)
build_topology_kokkos();
// build pairwise lists for all perpetual NPair/NeighList
// grow() with nlocal/nall args so that only realloc if have to
atomKK->sync(Host,ALL_MASK);
for (i = 0; i < npair_perpetual; i++) {
m = plist[i];
lists[m]->grow(nlocal,nall);
neigh_pair[m]->build_setup();
neigh_pair[m]->build(lists[m]);
}
// build topology lists for bonds/angles/etc
if (atom->molecular && topoflag) build_topology();
}
template<class DeviceType>
@ -532,26 +327,6 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
/* ---------------------------------------------------------------------- */
void NeighborKokkos::setup_bins_kokkos(int i)
{
if (lists_host[slist[i]]) {
lists_host[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
} else if (lists_device[slist[i]]) {
lists_device[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
}
//if (i < nslist-1) return; // this won't work if a non-kokkos neighbor list is last
if (maxhead > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_type_grow_kokkos(){
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
k_ex1_type.modify<LMPHostType>();
@ -575,8 +350,8 @@ void NeighborKokkos::modify_mol_group_grow_kokkos(){
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::init_topology() {
if (device_flag) {
neighbond_device.init_topology_kk();
} else {
neighbond_host.init_topology_kk();
@ -588,8 +363,8 @@ void NeighborKokkos::init_topology_kokkos() {
normally built with pair lists, but USER-CUDA separates them
------------------------------------------------------------------------- */
void NeighborKokkos::build_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::build_topology() {
if (device_flag) {
neighbond_device.build_topology_kk();
k_bondlist = neighbond_device.k_bondlist;
@ -637,7 +412,3 @@ void NeighborKokkos::build_topology_kokkos() {
k_improperlist.modify<LMPHostType>();
}
}
// include to trigger instantiation of templated functions
#include "neigh_full_kokkos.h"

View File

@ -22,316 +22,6 @@
namespace LAMMPS_NS {
template<class Device>
class NeighborKokkosExecute
{
typedef ArrayTypes<Device> AT;
public:
NeighListKokkos<Device> neigh_list;
const typename AT::t_xfloat_2d_randomread cutneighsq;
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
const int exclude;
const int nex_type;
const int maxex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const int maxex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const int maxex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<Device> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,const int & _maxex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,const int & _maxex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,const int & _maxex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),maxex_group(_maxex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int GhostNewton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
template<int ClusterSize>
KOKKOS_FUNCTION
void build_cluster_Item(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int GhostNewton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class Device>
struct NeighborKokkosBinAtomsFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
c(_c) {};
~NeighborKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
struct NeighborKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
KOKKOS_INLINE_FUNCTION
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<class Device,int HALF_NEIGH>
struct NeighborKokkosBuildFunctorGhost {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctorGhost(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
template<class Device,int ClusterSize>
struct NeighborClusterKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_cluster_Item<ClusterSize>(i);
}
};
template<class DeviceType>
struct TagNeighborCheckDistance{};
@ -342,24 +32,11 @@ class NeighborKokkos : public Neighbor {
public:
typedef int value_type;
int nlist_host; // pairwise neighbor lists on Host
NeighListKokkos<LMPHostType> **lists_host;
int nlist_device; // pairwise neighbor lists on Device
NeighListKokkos<LMPDeviceType> **lists_device;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
NeighborKokkos(class LAMMPS *);
~NeighborKokkos();
void init();
void init_topology();
void build_topology();
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
@ -369,11 +46,7 @@ class NeighborKokkos : public Neighbor {
KOKKOS_INLINE_FUNCTION
void operator()(TagNeighborXhold<DeviceType>, const int&) const;
private:
int atoms_per_bin;
DAT::tdual_xfloat_2d k_cutneighsq;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
@ -382,6 +55,16 @@ class NeighborKokkos : public Neighbor {
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
private:
DAT::tdual_x_array x;
DAT::tdual_x_array xhold;
@ -389,14 +72,10 @@ class NeighborKokkos : public Neighbor {
int device_flag;
void init_cutneighsq_kokkos(int);
int init_lists_kokkos();
void init_list_flags1_kokkos(int);
void init_list_flags2_kokkos(int);
void init_list_grow_kokkos(int);
void create_kokkos_list(int);
void init_ex_type_kokkos(int);
void init_ex_bit_kokkos();
void init_ex_mol_bit_kokkos();
void choose_build(int, NeighRequest *);
virtual int check_distance();
template<class DeviceType> int check_distance_kokkos();
virtual void build(int);
@ -405,27 +84,6 @@ class NeighborKokkos : public Neighbor {
void modify_ex_type_grow_kokkos();
void modify_ex_group_grow_kokkos();
void modify_mol_group_grow_kokkos();
void init_topology_kokkos();
void build_topology_kokkos();
typedef void (NeighborKokkos::*PairPtrHost)
(class NeighListKokkos<LMPHostType> *);
PairPtrHost *pair_build_host;
typedef void (NeighborKokkos::*PairPtrDevice)
(class NeighListKokkos<LMPDeviceType> *);
PairPtrDevice *pair_build_device;
template<class DeviceType,int HALF_NEIGH, int GHOST>
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
template<class DeviceType>
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
typedef void (NeighborKokkos::*StencilPtrHost)
(class NeighListKokkos<LMPHostType> *, int, int, int);
StencilPtrHost *stencil_create_host;
typedef void (NeighborKokkos::*StencilPtrDevice)
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
StencilPtrDevice *stencil_create_device;
};
}

View File

@ -0,0 +1,62 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_copy_kokkos.h"
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NPairCopyKokkos<DeviceType>::NPairCopyKokkos(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
create list which is simply a copy of parent list
------------------------------------------------------------------------- */
template<class DeviceType>
void NPairCopyKokkos<DeviceType>::build(NeighList *list)
{
NeighList *listcopy = list->listcopy;
list->inum = listcopy->inum;
list->gnum = listcopy->gnum;
list->ilist = listcopy->ilist;
list->numneigh = listcopy->numneigh;
list->firstneigh = listcopy->firstneigh;
list->firstdouble = listcopy->firstdouble;
list->ipage = listcopy->ipage;
list->dpage = listcopy->dpage;
NeighListKokkos<DeviceType>* list_kk = (NeighListKokkos<DeviceType>*) list;
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) list->listcopy;
list_kk->d_ilist = listcopy_kk->d_ilist;
list_kk->d_numneigh = listcopy_kk->d_numneigh;
list_kk->d_neighbors = listcopy_kk->d_neighbors;
}
namespace LAMMPS_NS {
template class NPairCopyKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NPairCopyKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(copy/kk/device,
NPairCopyKokkos<LMPDeviceType>,
NP_COPY | NP_KOKKOS_DEVICE)
NPairStyle(copy/kk/host,
NPairCopyKokkos<LMPHostType>,
NP_COPY | NP_KOKKOS_HOST)
#else
#ifndef LMP_NPAIR_COPY_KOKKOS_H
#define LMP_NPAIR_COPY_KOKKOS_H
#include "npair.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NPairCopyKokkos : public NPair {
public:
NPairCopyKokkos(class LAMMPS *);
~NPairCopyKokkos() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -11,17 +11,105 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "neighbor_kokkos.h"
#include "nbin_kokkos.h"
#include "nstencil.h"
#include "force.h"
namespace LAMMPS_NS {
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
}
/* ----------------------------------------------------------------------
copy needed info from Neighbor class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_neighbor_info()
{
NPair::copy_neighbor_info();
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
// general params
newton_pair = force->newton_pair;
k_cutneighsq = neighborKK->k_cutneighsq;
// exclusion info
k_ex1_type = neighborKK->k_ex1_type;
k_ex2_type = neighborKK->k_ex2_type;
k_ex_type = neighborKK->k_ex_type;
k_ex1_group = neighborKK->k_ex1_group;
k_ex2_group = neighborKK->k_ex2_group;
k_ex1_bit = neighborKK->k_ex1_bit;
k_ex2_bit = neighborKK->k_ex2_bit;
k_ex_mol_group = neighborKK->k_ex_mol_group;
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
}
/* ----------------------------------------------------------------------
copy per-atom and per-bin vectors from NBin class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_bin_info()
{
NPair::copy_bin_info();
NBinKokkos<DeviceType>* nbKK = (NBinKokkos<DeviceType>*) nb;
atoms_per_bin = nbKK->atoms_per_bin;
k_bincount = nbKK->k_bincount;
k_bins = nbKK->k_bins;
}
/* ----------------------------------------------------------------------
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_stencil_info()
{
NPair::copy_stencil_info();
nstencil = ns->nstencil;
int maxstencil = ns->get_maxstencil();
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
for (int k = 0; k < maxstencil; k++)
k_stencil.h_view(k) = ns->stencil[k];
k_stencil.modify<LMPHostType>();
k_stencil.sync<DeviceType>();
if (GHOST) {
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
for (int k = 0; k < maxstencil; k++) {
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
}
k_stencilxyz.modify<LMPHostType>();
k_stencilxyz.sync<DeviceType>();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::build(NeighList *list_)
{
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
int nall = nlocal;
if (GHOST)
@ -32,7 +120,11 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nlocal,
k_bins.view<DeviceType>(),
nstencil,
k_stencil.view<DeviceType>(),
k_stencilxyz.view<DeviceType>(),
nlocal,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
@ -43,16 +135,16 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,maxex_type,
exclude, nex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
nex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
nex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
@ -69,40 +161,15 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
k_bincount.sync<DeviceType>(),
k_bins.sync<DeviceType>(),
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
if (GHOST)
Kokkos::deep_copy(list->d_stencilxyz,list->h_stencilxyz);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
@ -125,18 +192,18 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
#endif
if (GHOST) {
NeighborKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
Kokkos::parallel_for(nall, f);
} else {
if(newton_pair) {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
if (newton_pair) {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
} else {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
@ -169,24 +236,9 @@ if (GHOST) {
/* ---------------------------------------------------------------------- */
template<class Device>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
resize() = 1;
}
}
/* ---------------------------------------------------------------------- */
template<class Device>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
int NeighborKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
{
const int n1 = nspecial(i,0);
const int n2 = nspecial(i,1);
@ -214,9 +266,9 @@ int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) cons
/* ---------------------------------------------------------------------- */
template<class Device>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
int NeighborKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
const int &itype,const int &jtype) const
{
int m;
@ -241,8 +293,8 @@ int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh,int GhostNewton>
void NeighborKokkosExecute<Device>::
template<class DeviceType> template<int HalfNeigh,int Newton>
void NeighborKokkosExecute<DeviceType>::
build_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
@ -261,9 +313,8 @@ void NeighborKokkosExecute<Device>::
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
@ -272,8 +323,8 @@ void NeighborKokkosExecute<Device>::
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
@ -312,14 +363,16 @@ void NeighborKokkosExecute<Device>::
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(HalfNeigh&&(ibin==jbin)) continue;
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
const int jtype = type(j);
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
@ -331,7 +384,7 @@ void NeighborKokkosExecute<Device>::
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -364,15 +417,18 @@ void NeighborKokkosExecute<Device>::
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_HAVE_CUDA
extern __shared__ X_FLOAT sharedmem[];
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh,int GhostNewton>
template<class DeviceType> template<int HalfNeigh,int Newton>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
@ -429,8 +485,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if((j == i) ||
(HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton &&
(HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
@ -445,7 +501,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -472,9 +528,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
}
__syncthreads();
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
= d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
@ -501,7 +556,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
const int jtype = other_x[m + 3 * atoms_per_bin];
//if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
@ -514,7 +569,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -558,8 +613,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh>
void NeighborKokkosExecute<Device>::
template<class DeviceType> template<int HalfNeigh>
void NeighborKokkosExecute<DeviceType>::
build_Item_Ghost(const int &i) const
{
/* if necessary, goto next page and add pages */
@ -576,11 +631,10 @@ void NeighborKokkosExecute<Device>::
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
const typename ArrayTypes<Device>::t_int_1d_3_const_um stencilxyz
= neigh_list.d_stencilxyz;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
const typename ArrayTypes<DeviceType>::t_int_1d_3_const_um stencilxyz
= d_stencilxyz;
// loop over all atoms in surrounding bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
@ -679,197 +733,17 @@ void NeighborKokkosExecute<Device>::
neigh_list.d_ilist(i) = i;
}
template<class DeviceType>
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
{
const int nall = includegroup?atom->nfirst:atom->nlocal;
list->grow(nall);
}
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nall,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
atomKK->k_tag.view<DeviceType>(),
atomKK->k_special.view<DeviceType>(),
atomKK->k_nspecial.view<DeviceType>(),
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,maxex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
k_cutneighsq.sync<DeviceType>();
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
DeviceType::fence();
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.d_numneigh = list->d_numneigh;
}
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
namespace LAMMPS_NS {
template class NPairKokkos<LMPDeviceType,0,0>;
template class NPairKokkos<LMPDeviceType,0,1>;
template class NPairKokkos<LMPDeviceType,1,0>;
template class NPairKokkos<LMPDeviceType,1,1>;
#ifdef KOKKOS_HAVE_CUDA
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
template class NPairKokkos<LMPHostType,0,0>;
template class NPairKokkos<LMPHostType,0,1>;
template class NPairKokkos<LMPHostType,1,0>;
template class NPairKokkos<LMPHostType,1,1>;
#endif
if(newton_pair) {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
} else {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
}
list->inum = nall;
list->gnum = 0;
}
/* ---------------------------------------------------------------------- */
template<class Device> template<int ClusterSize>
void NeighborKokkosExecute<Device>::
build_cluster_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
bool skip = i == j;
for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
if(!skip) {
const int jtype = type(j);
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
n++;
//m += ClusterSize - j&(ClusterSize-1)-1;
}
}
}
}
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
}

424
src/KOKKOS/npair_kokkos.h Normal file
View File

@ -0,0 +1,424 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
typedef NPairKokkos<LMPHostType,0,0> NPairKokkosFullBinHost;
NPairStyle(full/bin/kk/host,
NPairKokkosFullBinHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,0> NPairKokkosFullBinDevice;
NPairStyle(full/bin/kk/device,
NPairKokkosFullBinDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,0,1> NPairKokkosFullBinGhostHost;
NPairStyle(full/bin/ghost/kk/host,
NPairKokkosFullBinGhostHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,1> NPairKokkosFullBinGhostDevice;
NPairStyle(full/bin/ghost/kk/device,
NPairKokkosFullBinGhostDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,0> NPairKokkosHalfBinHost;
NPairStyle(half/bin/kk/host,
NPairKokkosHalfBinHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,0> NPairKokkosHalfBinDevice;
NPairStyle(half/bin/kk/device,
NPairKokkosHalfBinDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,1> NPairKokkosHalfBinGhostHost;
NPairStyle(half/bin/ghost/kk/host,
NPairKokkosHalfBinGhostHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,1> NPairKokkosHalfBinGhostDevice;
NPairStyle(half/bin/ghost/kk/device,
NPairKokkosHalfBinGhostDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
#else
#ifndef LMP_NPAIR_KOKKOS_H
#define LMP_NPAIR_KOKKOS_H
#include "npair.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType, int HALF_NEIGH, int GHOST>
class NPairKokkos : public NPair {
public:
NPairKokkos(class LAMMPS *);
~NPairKokkos() {}
void copy_neighbor_info();
void copy_bin_info();
void copy_stencil_info();
void build(class NeighList *);
private:
int newton_pair;
// data from Neighbor class
DAT::tdual_xfloat_2d k_cutneighsq;
// exclusion data from Neighbor class
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
// data from NBin class
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
// data from NStencil class
int nstencil;
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
DAT::tdual_int_1d_3 k_stencilxyz;
};
template<class DeviceType>
class NeighborKokkosExecute
{
typedef ArrayTypes<DeviceType> AT;
public:
NeighListKokkos<DeviceType> neigh_list;
// data from Neighbor class
const typename AT::t_xfloat_2d_randomread cutneighsq;
// exclusion data from Neighbor class
const int exclude;
const int nex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
// data from NBin class
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
// data from NStencil class
int nstencil;
typename AT::t_int_1d d_stencil; // # of J neighs for each I
typename AT::t_int_1d_3 d_stencilxyz;
// data from Atom class
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<DeviceType> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nstencil,
const typename AT::t_int_1d &_d_stencil,
const typename AT::t_int_1d_3 &_d_stencilxyz,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int Newton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int Newton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class DeviceType,int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
__device__ inline
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor<LMPHostType,HALF_NEIGH,GHOST_NEWTON> {
typedef LMPHostType device_type;
const NeighborKokkosExecute<LMPHostType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<LMPHostType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {}
};
template<class DeviceType,int HALF_NEIGH>
struct NPairKokkosBuildFunctorGhost {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -90,7 +90,7 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -309,19 +309,12 @@ void PairBuckCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/cut/kk");
}

View File

@ -109,7 +109,7 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -458,11 +458,9 @@ void PairBuckCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -79,7 +79,7 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -233,19 +233,12 @@ void PairBuckKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairBuckKokkos : public PairBuck {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairBuckKokkos(class LAMMPS *);
@ -96,17 +96,14 @@ class PairBuckKokkos : public PairBuck {
friend class PairComputeFunctor<PairBuckKokkos,HALF,true>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairBuckKokkos,N2,true>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairBuckKokkos,FULL,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALF,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairBuckKokkos,N2,false>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,N2,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
};

View File

@ -78,7 +78,7 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -215,11 +215,9 @@ void PairCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
}

View File

@ -85,7 +85,7 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -257,19 +257,12 @@ void PairCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/debye/kk");
}

View File

@ -221,11 +221,9 @@ void PairCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk");
}

View File

@ -102,7 +102,7 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -408,11 +408,9 @@ void PairCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -222,11 +222,9 @@ void PairCoulWolfKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
}

View File

@ -286,11 +286,9 @@ void PairEAMAlloyKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
}

View File

@ -291,11 +291,9 @@ void PairEAMFSKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs");
}

View File

@ -281,11 +281,9 @@ void PairEAMKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk");
}

View File

@ -333,145 +333,6 @@ struct PairComputeFunctor {
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
typedef EV_FLOAT value_type;
PairStyle c;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
EV_FLOAT ev;
int i = dev.league_rank()*dev.team_size() + dev.team_rank();
const X_FLOAT xtmp = c.c_x(i,0);
const X_FLOAT ytmp = c.c_x(i,1);
const X_FLOAT ztmp = c.c_x(i,2);
int itype = c.type(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT3 ftmp;
for (int jj = 0; jj < jnum; jj++) {
int jjj = neighbors_i(jj);
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) {
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
const int j = (jjj + k)&NEIGHMASK;
if((j==i)||(j>=c.nall)) return;
const X_FLOAT delx = xtmp - c.c_x(j,0);
const X_FLOAT dely = ytmp - c.c_x(j,1);
const X_FLOAT delz = ztmp - c.c_x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fftmp.x += delx*fpair;
fftmp.y += dely*fpair;
fftmp.z += delz*fpair;
if (EVFLAG) {
F_FLOAT evdwl = 0.0;
if (c.eflag) {
evdwl = 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
ev.evdwl += evdwl;
}
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
},ftmp);
}
Kokkos::single(Kokkos::PerThread(dev), [&]() {
c.f(i,0) += ftmp.x;
c.f(i,1) += ftmp.y;
c.f(i,2) += ftmp.z;
});
return ev;
}
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = c.eflag;
const int NEWTON_PAIR = c.newton_pair;
const int VFLAG = c.vflag_either;
if (EFLAG) {
if (c.eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
if (NEWTON_PAIR || i < c.nlocal) c.d_eatom[i] += epairhalf;
if (NEWTON_PAIR || j < c.nlocal) c.d_eatom[j] += epairhalf;
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (c.vflag_atom) {
if (i < c.nlocal) {
c.d_vatom(i,0) += 0.5*v0;
c.d_vatom(i,1) += 0.5*v1;
c.d_vatom(i,2) += 0.5*v2;
c.d_vatom(i,3) += 0.5*v3;
c.d_vatom(i,4) += 0.5*v4;
c.d_vatom(i,5) += 0.5*v5;
}
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
@ -607,8 +468,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
// The enable_if clause will invalidate the last parameter of the function, so that
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
// This uses the fact that failure to match template parameters is not an error.
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
// By having the enable_if with a ! and without it, exactly one of the functions
// pair_compute_neighlist will match - either the dummy version
// or the real one further below.
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -619,15 +480,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
(void) fpair;
(void) list;
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",FULLCLUSTER,PairStyle::EnabledNeighFlags);
return ev;
}
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -644,41 +496,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<(FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
} else {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
}
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
@ -690,8 +507,6 @@ EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::dev
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
} else if (fpair->neighflag == N2) {
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
} else if (fpair->neighflag == FULLCLUSTER) {
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
}
return ev;
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -455,11 +455,9 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/implicit/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -456,11 +456,9 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -486,11 +486,9 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -289,19 +289,12 @@ void PairLJClass2CoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/cut/kk");
}

View File

@ -95,7 +95,7 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -445,11 +445,9 @@ void PairLJClass2CoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -227,19 +227,12 @@ void PairLJClass2Kokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJClass2Kokkos : public PairLJClass2 {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJClass2Kokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJClass2Kokkos : public PairLJClass2 {
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,N2,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
};

View File

@ -87,7 +87,7 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -280,19 +280,12 @@ void PairLJCutCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -91,7 +91,7 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -310,19 +310,12 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/debye/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -301,19 +301,12 @@ void PairLJCutCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -464,11 +464,9 @@ void PairLJCutCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -245,19 +245,12 @@ void PairLJCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutKokkos : public PairLJCut {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJCutKokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJCutKokkos : public PairLJCut {
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
};

View File

@ -86,7 +86,7 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -230,19 +230,12 @@ void PairLJExpandKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/expand/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJExpandKokkos : public PairLJExpand {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJExpandKokkos(class LAMMPS *);
@ -100,17 +100,14 @@ class PairLJExpandKokkos : public PairLJExpand {
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,N2,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
};

View File

@ -101,7 +101,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -439,11 +439,9 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk");
}

View File

@ -98,7 +98,7 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -277,11 +277,9 @@ void PairLJGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/kk");
}

View File

@ -86,7 +86,7 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -258,19 +258,12 @@ void PairLJSDKKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/sdk/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJSDKKokkos : public PairLJSDK {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJSDKKokkos(class LAMMPS *);
@ -97,17 +97,14 @@ class PairLJSDKKokkos : public PairLJSDK {
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJSDKKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,FULL,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALF,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALFTHREAD,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,N2,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJSDKKokkos>(PairLJSDKKokkos*);
};

View File

@ -146,12 +146,10 @@ void PairReaxCKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");

View File

@ -601,7 +601,6 @@ void PairSWKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -96,7 +96,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -142,19 +142,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
} else {
if (neighflag == FULL) {
@ -177,19 +164,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
}
@ -1261,19 +1235,12 @@ void PairTableKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -41,7 +41,7 @@ template<class DeviceType>
class PairTableKokkos : public Pair {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
@ -170,45 +170,37 @@ class PairTableKokkos : public Pair {
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
};

View File

@ -103,7 +103,6 @@ void PairTersoffKokkos<DeviceType>::init_style()
//if (neighflag == FULL || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -102,7 +102,6 @@ void PairTersoffMODKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -113,7 +113,6 @@ void PairTersoffZBLKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -195,17 +195,6 @@ void EwaldDisp::init()
g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff);
else g_ewald = sqrt(-log(g_ewald)) / (*cutoff);
}
else if (function[1] || function[2]) {
//Try Newton Solver
//Use old method to get guess
g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
double g_ewald_new =
NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
"using old method to estimate g_ewald");
} else if (function[3]) {
//Try Newton Solver
//Use old method to get guess
@ -215,6 +204,16 @@ void EwaldDisp::init()
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
"using old method to estimate g_ewald");
} else if (function[1] || function[2]) {
//Try Newton Solver
//Use old method to get guess
g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
double g_ewald_new =
NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
"using old method to estimate g_ewald");
}
}
@ -708,6 +707,8 @@ void EwaldDisp::compute(int eflag, int vflag)
compute_virial();
compute_virial_dipole();
compute_virial_peratom();
if (slabflag) compute_slabcorr();
}
@ -974,7 +975,6 @@ void EwaldDisp::compute_energy()
}
}
for (int k=0; k<EWALD_NFUNCS; ++k) energy += c[k]*sum[k]-energy_self[k];
if (slabflag) compute_slabcorr();
}
/* ---------------------------------------------------------------------- */
@ -1480,10 +1480,7 @@ double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
double a = Rc*x;
double f = 0.0;
if (function[1] || function[2]) { // LJ
f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
(6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
} else { // dipole
if (function[3]) { // dipole
double rg2 = a*a;
double rg4 = rg2*rg2;
double rg6 = rg4*rg2;
@ -1492,7 +1489,10 @@ double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) *
sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) *
exp(-rg2)) - accuracy;
}
} else if (function[1] || function[2]) { // LJ
f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
(6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
}
return f;
}

127
src/MAKE/MACHINES/Makefile.cori2 Executable file
View File

@ -0,0 +1,127 @@
# cori2 = NERSC Cori II KNL, static build, FFTW (single precision)
# ---------------------------------------------------------------------
# module swap craype-haswell craype-mic-knl
# module load fftw
# module load craype-hugepages2M
# Recommend using #SBATCH -S 2 for core specialization
# ---------------------------------------------------------------------
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = CC
OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_NO_TBB
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = CC
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = #-DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manaul
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_FFTW3 -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -lfftw3f
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = #-ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -0,0 +1,123 @@
# intel_cpu_intelmpi = USER-INTEL package, Intel MPI, MKL FFT
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpiicpc
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
LIB = -ltbbmalloc -ltbbmalloc_proxy
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manaul
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -L$MKLROOT/lib/intel64/ -lmkl_intel_ilp64 \
-lmkl_sequential -lmkl_core
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -0,0 +1,123 @@
# intel_phi = USER-INTEL with Phi x200 (KNL) offload support,Intel MPI,MKL FFT
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpiicpc
MIC_OPT = -qoffload-arch=mic-avx512 -fp-model fast=2
CCFLAGS = -g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
-xHost -fno-alias -ansi-alias -restrict \
-qoverride-limits $(MIC_OPT)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc
LINKFLAGS = -g -O3 -xHost -qopenmp -qoffload $(MIC_OPT)
LIB = -ltbbmalloc
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manaul
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -59,8 +59,9 @@ style () {
# called by "make machine"
# col 1 = string to search for
# col 2 = search in *.h files starting with this name
# col 3 = prefix of style file
# col 4
# col 3 = name of style file
# col 4 = file that includes the style file
# col 5 = optional 2nd file that includes the style file
if (test $1 = "style") then
@ -69,7 +70,7 @@ if (test $1 = "style") then
style BODY_CLASS body_ body atom_vec_body
style BOND_CLASS bond_ bond force
style COMMAND_CLASS "" command input
style COMPUTE_CLASS compute_ compute modify modify_cuda
style COMPUTE_CLASS compute_ compute modify
style DIHEDRAL_CLASS dihedral_ dihedral force
style DUMP_CLASS dump_ dump output write_dump
style FIX_CLASS fix_ fix modify
@ -77,6 +78,10 @@ if (test $1 = "style") then
style INTEGRATE_CLASS "" integrate update
style KSPACE_CLASS "" kspace force
style MINIMIZE_CLASS min_ minimize update
style NBIN_CLASS nbin_ nbin neighbor
style NPAIR_CLASS npair_ npair neighbor
style NSTENCIL_CLASS nstencil_ nstencil neighbor
style NTOPO_CLASS ntopo_ ntopo neighbor
style PAIR_CLASS pair_ pair force
style READER_CLASS reader_ reader read_dump
style REGION_CLASS region_ region domain

View File

@ -89,6 +89,7 @@ void Python::command(int narg, char **arg)
istr = NULL;
ostr = NULL;
format = NULL;
length_longstr = 0;
char *pyfile = NULL;
char *herestr = NULL;
int existflag = 0;
@ -115,6 +116,11 @@ void Python::command(int narg, char **arg)
format = new char[n];
strcpy(format,arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"length") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Invalid python command");
length_longstr = force->inumeric(FLERR,arg[iarg+1]);
if (length_longstr <= 0) error->all(FLERR,"Invalid python command");
iarg += 2;
} else if (strcmp(arg[iarg],"file") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Invalid python command");
delete[] pyfile;
@ -249,6 +255,7 @@ void Python::invoke_function(int ifunc, char *result)
// function returned a value
// assign it to result string stored by python-style variable
// or if user specified a length, assign it to longstr
if (pfuncs[ifunc].noutput) {
int otype = pfuncs[ifunc].otype;
@ -258,7 +265,9 @@ void Python::invoke_function(int ifunc, char *result)
sprintf(result,"%.15g",PyFloat_AsDouble(pValue));
} else if (otype == STRING) {
char *pystr = PyString_AsString(pValue);
strncpy(result,pystr,VALUELENGTH-1);
if (pfuncs[ifunc].longstr)
strncpy(pfuncs[ifunc].longstr,pystr,pfuncs[ifunc].length_longstr);
else strncpy(result,pystr,VALUELENGTH-1);
}
Py_DECREF(pValue);
}
@ -287,6 +296,13 @@ int Python::variable_match(char *name, char *varname, int numeric)
/* ------------------------------------------------------------------ */
char *Python::long_string(int ifunc)
{
return pfuncs[ifunc].longstr;
}
/* ------------------------------------------------------------------ */
int Python::create_entry(char *name)
{
// ifunc = index to entry by name in pfuncs vector, can be old or new
@ -370,6 +386,7 @@ int Python::create_entry(char *name)
// process output as value or variable
pfuncs[ifunc].ovarname = NULL;
pfuncs[ifunc].longstr = NULL;
if (!noutput) return ifunc;
char type = format[ninput];
@ -378,6 +395,14 @@ int Python::create_entry(char *name)
else if (type == 's') pfuncs[ifunc].otype = STRING;
else error->all(FLERR,"Invalid python command");
if (length_longstr) {
if (pfuncs[ifunc].otype != STRING)
error->all(FLERR,"Python command length keyword "
"cannot be used unless output is a string");
pfuncs[ifunc].length_longstr = length_longstr;
pfuncs[ifunc].longstr = new char[length_longstr+1];
}
if (strstr(ostr,"v_") != ostr) error->all(FLERR,"Invalid python command");
int n = strlen(&ostr[2]) + 1;
pfuncs[ifunc].ovarname = new char[n];
@ -398,4 +423,5 @@ void Python::deallocate(int i)
delete [] pfuncs[i].svalue[j];
delete [] pfuncs[i].svalue;
delete [] pfuncs[i].ovarname;
delete [] pfuncs[i].longstr;
}

View File

@ -28,9 +28,10 @@ class Python : protected Pointers {
void invoke_function(int, char *);
int find(char *);
int variable_match(char *, char *, int);
char *long_string(int);
private:
int ninput,noutput;
int ninput,noutput,length_longstr;
char **istr;
char *ostr,*format;
void *pyMain;
@ -44,6 +45,8 @@ class Python : protected Pointers {
char **svalue;
int otype;
char *ovarname;
char *longstr;
int length_longstr;
void *pFunc;
};

View File

@ -13,6 +13,40 @@ style_kspace.h
style_minimize.h
style_pair.h
style_region.h
style_neigh_bin.h
style_neigh_pair.h
style_neigh_stencil.h
# deleted on ## XXX 2016
accelerator_intel.h
neigh_bond.cpp
neigh_bond.h
neigh_derive.cpp
neigh_derive.h
neigh_full.cpp
neigh_full.h
neigh_gran.cpp
neigh_gran.h
neigh_half_bin.cpp
neigh_half_bin.h
neigh_half_multi.cpp
neigh_half_multi.h
neigh_half_nsq.cpp
neigh_half_nsq.h
neigh_respa.cpp
neigh_respa.h
neigh_shardlow.cpp
neigh_shardlow.h
neigh_stencil.cpp
neigh_half_bin_intel.cpp
neigh_full_kokkos.h
neighbor_omp.h
neigh_derive_omp.cpp
neigh_full_omp.cpp
neigh_gran_omp.cpp
neigh_half_bin_omp.cpp
neigh_half_multi_omp.cpp
neigh_half_nsq_omp.cpp
neigh_respa_omp.cpp
# deleted on 20 Sep 2016
fix_ti_rs.cpp
fix_ti_rs.h

View File

@ -241,6 +241,7 @@ void PRD::command(int narg, char **arg)
update->minimize->init();
// cannot use PRD with a changing box
// removing this restriction would require saving/restoring box params
if (domain->box_change)
error->all(FLERR,"Cannot use PRD with a changing box");

View File

@ -33,7 +33,8 @@ enum{ID,MOL,MASS,X,Y,Z,XU,YU,ZU,VX,VY,VZ,FX,FY,FZ,IX,IY,IZ,
/* ---------------------------------------------------------------------- */
ComputeRigidLocal::ComputeRigidLocal(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg), rstyle(NULL), idrigid(NULL), fixrigid(NULL)
Compute(lmp, narg, arg),
rstyle(NULL), idrigid(NULL), fixrigid(NULL), vlocal(NULL), alocal(NULL)
{
if (narg < 5) error->all(FLERR,"Illegal compute rigid/local command");
@ -88,16 +89,16 @@ ComputeRigidLocal::ComputeRigidLocal(LAMMPS *lmp, int narg, char **arg) :
}
ncount = nmax = 0;
vector = NULL;
array = NULL;
vlocal = NULL;
alocal = NULL;
}
/* ---------------------------------------------------------------------- */
ComputeRigidLocal::~ComputeRigidLocal()
{
memory->destroy(vector);
memory->destroy(array);
memory->destroy(vlocal);
memory->destroy(alocal);
delete [] idrigid;
delete [] rstyle;
}
@ -169,8 +170,8 @@ int ComputeRigidLocal::compute_rigid(int flag)
body = &fixrigid->body[ibody];
if (flag) {
if (nvalues == 1) ptr = &vector[m];
else ptr = array[m];
if (nvalues == 1) ptr = &vlocal[m];
else ptr = alocal[m];
for (n = 0; n < nvalues; n++) {
switch (rstyle[n]) {
@ -293,18 +294,18 @@ int ComputeRigidLocal::compute_rigid(int flag)
void ComputeRigidLocal::reallocate(int n)
{
// grow vector or array
// grow vector_local or array_local
while (nmax < n) nmax += DELTA;
if (nvalues == 1) {
memory->destroy(vector);
memory->create(vector,nmax,"rigid/local:vector");
vector_local = vector;
memory->destroy(vlocal);
memory->create(vlocal,nmax,"rigid/local:vector_local");
vector_local = vlocal;
} else {
memory->destroy(array);
memory->create(array,nmax,nvalues,"rigid/local:array");
array_local = array;
memory->destroy(alocal);
memory->create(alocal,nmax,nvalues,"rigid/local:array_local");
array_local = alocal;
}
}

View File

@ -41,6 +41,8 @@ class ComputeRigidLocal : public Compute {
class FixRigidSmall *fixrigid;
int nmax;
double *vlocal;
double **alocal;
int compute_rigid(int);
void reallocate(int);

View File

@ -43,7 +43,7 @@ ComputeDpdAtom::ComputeDpdAtom(LAMMPS *lmp, int narg, char **arg) :
size_peratom_cols = 4;
nmax = 0;
if (atom->dpd_flag != 1) error->all(FLERR,"compute dpd requires atom_style with internal temperature and energies (e.g. dpd)");
}

View File

@ -70,8 +70,8 @@ void FixEOScv::init()
if (mask[i] & groupbit) {
if(dpdTheta[i] <= 0.0)
error->one(FLERR,"Internal temperature <= zero");
uCond[i] = 0.5*cvEOS*dpdTheta[i];
uMech[i] = 0.5*cvEOS*dpdTheta[i];
uCond[i] = 0.0;
uMech[i] = cvEOS*dpdTheta[i];
}
}
}

View File

@ -50,6 +50,10 @@ Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: FixEOScv requires atom_style with internal temperature and energies (e.g. dpd)
Self-explanatory.
E: EOS cv must be > 0.0
The constant volume heat capacity must be larger than zero.

View File

@ -122,8 +122,8 @@ void FixEOStable::init()
if(dpdTheta[i] <= 0.0)
error->one(FLERR,"Internal temperature <= zero");
energy_lookup(dpdTheta[i],tmp);
uCond[i] = tmp / 2.0;
uMech[i] = tmp / 2.0;
uCond[i] = 0.0;
uMech[i] = tmp;
}
}
}

View File

@ -92,6 +92,10 @@ E: eos/table values are not increasing
The EOS must be a monotonically, increasing function
E: FixEOStable requires atom_style with internal temperature and energies (e.g. dpd)
Self-explanatory.
E: Internal temperature < zero
Self-explanatory. EOS may not be valid under current simulation conditions.

View File

@ -34,7 +34,7 @@ using namespace FixConst;
/* ---------------------------------------------------------------------- */
FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg), ntables(0), tables(NULL),
Fix(lmp, narg, arg), ntables(0), tables(NULL),
tables2(NULL), dHf(NULL), eosSpecies(NULL)
{
if (narg != 8) error->all(FLERR,"Illegal fix eos/table/rx command");
@ -162,13 +162,15 @@ void FixEOStableRX::setup(int vflag)
double *uCG = atom->uCG;
double *uCGnew = atom->uCGnew;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit){
duChem = uCG[i] - uCGnew[i];
uChem[i] += duChem;
uCG[i] = 0.0;
uCGnew[i] = 0.0;
}
if(!this->restart_reset){
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit){
duChem = uCG[i] - uCGnew[i];
uChem[i] += duChem;
uCG[i] = 0.0;
uCGnew[i] = 0.0;
}
}
// Communicate the updated momenta and velocities to all nodes
comm->forward_comm_fix(this);
@ -200,8 +202,8 @@ void FixEOStableRX::init()
if(dpdTheta[i] <= 0.0)
error->one(FLERR,"Internal temperature <= zero");
energy_lookup(i,dpdTheta[i],tmp);
uCond[i] = tmp / 2.0;
uMech[i] = tmp / 2.0;
uCond[i] = 0.0;
uMech[i] = tmp;
uChem[i] = 0.0;
}
}

View File

@ -106,6 +106,10 @@ E: eos/table/rx values are not increasing
The equation-of-state must an increasing function
E: FixEOStableRX requires atom_style with internal temperature and energies (e.g. dpd)
Self-explanatory.
E: Internal temperature <= zero.
Self-explanatory.

View File

@ -60,16 +60,15 @@ double getElapsedTime( const TimerType &t0, const TimerType &t1) { return t1-t0;
/* ---------------------------------------------------------------------- */
FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg), mol2param(NULL), nreactions(0),
params(NULL), Arr(NULL), nArr(NULL), Ea(NULL), tempExp(NULL),
stoich(NULL), stoichReactants(NULL), stoichProducts(NULL), kR(NULL),
pairDPDE(NULL), dpdThetaLocal(NULL), sumWeights(NULL), sparseKinetics_nu(NULL),
sparseKinetics_nuk(NULL), sparseKinetics_inu(NULL), sparseKinetics_isIntegralReaction(NULL),
kineticsFile(NULL), id_fix_species(NULL),
Fix(lmp, narg, arg), mol2param(NULL), nreactions(0),
params(NULL), Arr(NULL), nArr(NULL), Ea(NULL), tempExp(NULL),
stoich(NULL), stoichReactants(NULL), stoichProducts(NULL), kR(NULL),
pairDPDE(NULL), dpdThetaLocal(NULL), sumWeights(NULL), sparseKinetics_nu(NULL),
sparseKinetics_nuk(NULL), sparseKinetics_inu(NULL), sparseKinetics_isIntegralReaction(NULL),
kineticsFile(NULL), id_fix_species(NULL),
id_fix_species_old(NULL), fix_species(NULL), fix_species_old(NULL)
{
if (narg < 7 || narg > 12) error->all(FLERR,"Illegal fix rx command");
restart_peratom = 1;
nevery = 1;
nreactions = maxparam = 0;
@ -366,6 +365,7 @@ void FixRX::post_constructor()
modify->add_fix(nspecies+5,newarg);
fix_species = (FixPropertyAtom *) modify->fix[modify->nfix-1];
restartFlag = modify->fix[modify->nfix-1]->restart_reset;
modify->add_fix(nspecies+5,newarg2);
fix_species_old = (FixPropertyAtom *) modify->fix[modify->nfix-1];
@ -647,34 +647,38 @@ void FixRX::setup_pre_force(int vflag)
double tmp;
int ii;
if(localTempFlag){
int count = nlocal + (newton_pair ? nghost : 0);
dpdThetaLocal = new double[count];
memset(dpdThetaLocal, 0, sizeof(double)*count);
computeLocalTemperature();
if(restartFlag){
restartFlag = 0;
} else {
if(localTempFlag){
int count = nlocal + (newton_pair ? nghost : 0);
dpdThetaLocal = new double[count];
memset(dpdThetaLocal, 0, sizeof(double)*count);
computeLocalTemperature();
}
for (int id = 0; id < nlocal; id++)
for (int ispecies=0; ispecies<nspecies; ispecies++){
tmp = atom->dvector[ispecies][id];
atom->dvector[ispecies+nspecies][id] = tmp;
}
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit){
// Set the reaction rate constants to zero: no reactions occur at step 0
for(int irxn=0;irxn<nreactions;irxn++)
kR[irxn] = 0.0;
if (odeIntegrationFlag == ODE_LAMMPS_RK4)
rk4(i,NULL);
else if (odeIntegrationFlag == ODE_LAMMPS_RKF45)
rkf45(i,NULL);
}
// Communicate the updated momenta and velocities to all nodes
comm->forward_comm_fix(this);
if(localTempFlag) delete [] dpdThetaLocal;
}
for (int id = 0; id < nlocal; id++)
for (int ispecies=0; ispecies<nspecies; ispecies++){
tmp = atom->dvector[ispecies][id];
atom->dvector[ispecies+nspecies][id] = tmp;
}
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit){
// Set the reaction rate constants to zero: no reactions occur at step 0
for(int irxn=0;irxn<nreactions;irxn++)
kR[irxn] = 0.0;
if (odeIntegrationFlag == ODE_LAMMPS_RK4)
rk4(i,NULL);
else if (odeIntegrationFlag == ODE_LAMMPS_RKF45)
rkf45(i,NULL);
}
// Communicate the updated momenta and velocities to all nodes
comm->forward_comm_fix(this);
if(localTempFlag) delete [] dpdThetaLocal;
}
/* ---------------------------------------------------------------------- */

View File

@ -132,6 +132,7 @@ class FixRX : public Fix {
char *kineticsFile;
char *id_fix_species,*id_fix_species_old;
class FixPropertyAtom *fix_species,*fix_species_old;
int restartFlag;
};

View File

@ -47,6 +47,7 @@
#include "comm.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "random_mars.h"
#include "memory.h"
#include "domain.h"
@ -139,6 +140,23 @@ int FixShardlow::setmask()
/* ---------------------------------------------------------------------- */
void FixShardlow::init()
{
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->fix = 1;
neighbor->requests[irequest]->ssa = 1;
}
/* ---------------------------------------------------------------------- */
void FixShardlow::init_list(int id, NeighList *ptr)
{
list = ptr;
}
/* ---------------------------------------------------------------------- */
void FixShardlow::pre_exchange()
{
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
@ -217,7 +235,6 @@ void FixShardlow::ssa_update(
int newton_pair = force->newton_pair;
double randPair;
int *ssaAIR = atom->ssaAIR;
double *uCond = atom->uCond;
double *uMech = atom->uMech;
double *dpdTheta = atom->dpdTheta;
@ -411,7 +428,6 @@ void FixShardlow::initial_integrate(int vflag)
int nghost = atom->nghost;
int airnum;
class NeighList *list; // points to list in pairDPD or pairDPDE
class RanMars *pRNG;
// NOTE: this logic is specific to orthogonal boxes, not triclinic
@ -432,12 +448,10 @@ void FixShardlow::initial_integrate(int vflag)
// Allocate memory for v_t0 to hold the initial velocities for the ghosts
v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0");
// Define pointers to access the neighbor list and RNG
// Define pointers to access the RNG
if(pairDPDE){
list = pairDPDE->list;
pRNG = pairDPDE->random;
} else {
list = pairDPD->list;
pRNG = pairDPD->random;
}
inum = list->inum;

View File

@ -26,9 +26,13 @@ namespace LAMMPS_NS {
class FixShardlow : public Fix {
public:
class NeighList *list; // The SSA specific neighbor list
FixShardlow(class LAMMPS *, int, char **);
~FixShardlow();
int setmask();
virtual void init();
virtual void init_list(int, class NeighList *);
virtual void setup(int);
virtual void initial_integrate(int);
void setup_pre_exchange();

129
src/USER-DPD/nbin_ssa.cpp Normal file
View File

@ -0,0 +1,129 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos (ARL) and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "nbin_ssa.h"
#include "atom.h"
#include "group.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp)
{
maxbin_ssa = 0;
bins_ssa = NULL;
maxhead_ssa = 0;
binhead_ssa = NULL;
gbinhead_ssa = NULL;
}
NBinSSA::~NBinSSA()
{
memory->destroy(bins_ssa);
memory->destroy(binhead_ssa);
memory->destroy(gbinhead_ssa);
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA)
local atoms are in distinct bins (binhead_ssa) from the ghosts
ghost atoms are in distinct bins (gbinhead_ssa) from the locals
ghosts which are not in an Active Interaction Region (AIR) are skipped
------------------------------------------------------------------------- */
void NBinSSA::bin_atoms()
{
int i,ibin;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
if (includegroup) nlocal = atom->nfirst;
double **x = atom->x;
int *mask = atom->mask;
int *ssaAIR = atom->ssaAIR;
for (i = 0; i < mbins; i++) {
gbinhead_ssa[i] = -1;
binhead_ssa[i] = -1;
}
// bin in reverse order so linked list will be in forward order
if (includegroup) {
int bitmask = group->bitmask[includegroup];
int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above
for (i = nall-1; i >= nowned; i--) {
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
if (mask[i] & bitmask) {
ibin = coord2bin(x[i]);
bins_ssa[i] = gbinhead_ssa[ibin];
gbinhead_ssa[ibin] = i;
}
}
} else {
for (i = nall-1; i >= nlocal; i--) {
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
ibin = coord2bin(x[i]);
bins_ssa[i] = gbinhead_ssa[ibin];
gbinhead_ssa[ibin] = i;
}
}
for (i = nlocal-1; i >= 0; i--) {
ibin = coord2bin(x[i]);
bins_ssa[i] = binhead_ssa[ibin];
binhead_ssa[ibin] = i;
}
}
/* ---------------------------------------------------------------------- */
void NBinSSA::bin_atoms_setup(int nall)
{
NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too
if (mbins > maxhead_ssa) {
maxhead_ssa = mbins;
memory->destroy(gbinhead_ssa);
memory->destroy(binhead_ssa);
memory->create(binhead_ssa,maxhead_ssa,"binhead_ssa");
memory->create(gbinhead_ssa,maxhead_ssa,"gbinhead_ssa");
}
if (nall > maxbin_ssa) {
maxbin_ssa = nall;
memory->destroy(bins_ssa);
memory->create(bins_ssa,maxbin_ssa,"bins_ssa");
}
}
/* ---------------------------------------------------------------------- */
bigint NBinSSA::memory_usage()
{
bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too
if (maxbin_ssa) bytes += memory->usage(bins_ssa,maxbin_ssa);
if (maxhead_ssa) {
bytes += memory->usage(binhead_ssa,maxhead_ssa);
bytes += memory->usage(gbinhead_ssa,maxhead_ssa);
}
return bytes;
}

54
src/USER-DPD/nbin_ssa.h Normal file
View File

@ -0,0 +1,54 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(ssa,
NBinSSA,
NB_SSA)
#else
#ifndef LMP_NBIN_SSA_H
#define LMP_NBIN_SSA_H
#include "nbin_standard.h"
namespace LAMMPS_NS {
class NBinSSA : public NBinStandard {
public:
int *bins_ssa; // index of next atom in each bin
int maxbin_ssa; // size of bins_ssa array
int *binhead_ssa; // index of 1st local atom in each bin
int *gbinhead_ssa; // index of 1st ghost atom in each bin
int maxhead_ssa; // size of binhead_ssa and gbinhead_ssa arrays
NBinSSA(class LAMMPS *);
~NBinSSA();
void bin_atoms_setup(int);
void bin_atoms();
bigint memory_usage();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,260 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "npair_half_bin_newton_ssa.h"
#include "neighbor.h"
#include "nstencil_ssa.h"
#include "nbin_ssa.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "group.h"
#include "memory.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
// allocate space for static class variable
// prototype for non-class function
static int *ssaAIRptr;
static int cmp_ssaAIR(const void *, const void *);
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
for use by Shardlow Spliting Algorithm
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonSSA::build(NeighList *list)
{
int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
if (includegroup) nlocal = atom->nfirst;
int *ssaAIR = atom->ssaAIR;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int molecular = atom->molecular;
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
MyPage<int> *ipage = list->ipage;
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
int nstencil_half = ns_ssa->nstencil_half;
int nstencil_full = ns_ssa->nstencil;
NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object");
int *bins_ssa = nb_ssa->bins_ssa;
int *binhead_ssa = nb_ssa->binhead_ssa;
int *gbinhead_ssa = nb_ssa->gbinhead_ssa;
int inum = 0;
ipage->reset();
// loop over owned atoms, storing half of the neighbors
for (i = 0; i < nlocal; i++) {
int AIRct[8] = { 0 };
n = 0;
neighptr = ipage->vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over rest of local atoms in i's bin
// just store them, since j is beyond i in linked list
for (j = bins_ssa[i]; j >= 0; j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
ibin = coord2bin(x[i]);
// loop over all local atoms in other bins in "half" stencil
for (k = 0; k < nstencil_half; k++) {
for (j = binhead_ssa[ibin+stencil[k]]; j >= 0;
j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
AIRct[0] = n;
// loop over AIR ghost atoms in all bins in "full" stencil
// Note: the non-AIR ghost atoms have already been filtered out
// That is a significant time savings because of the "full" stencil
// Note2: only non-pure locals can have ghosts as neighbors
if (ssaAIR[i] == 1) for (k = 0; k < nstencil_full; k++) {
for (j = gbinhead_ssa[ibin+stencil[k]]; j >= 0;
j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
} else if (domain->minimum_image_check(delx,dely,delz)) {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
} else if (which > 0) {
neighptr[n++] = j ^ (which << SBBITS);
++(AIRct[ssaAIR[j] - 1]);
}
} else {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
}
}
}
}
ilist[inum++] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage->vgot(n);
if (ipage->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
// sort the ghosts in the neighbor list by their ssaAIR number
ssaAIRptr = atom->ssaAIR;
qsort(&(neighptr[AIRct[0]]), n - AIRct[0], sizeof(int), cmp_ssaAIR);
// do a prefix sum on the counts to turn them into indexes
list->ndxAIR_ssa[i][0] = AIRct[0];
for (int ndx = 1; ndx < 8; ++ndx) {
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
}
}
list->inum = inum;
}
/* ----------------------------------------------------------------------
comparison function invoked by qsort()
accesses static class member ssaAIRptr, set before call to qsort()
------------------------------------------------------------------------- */
static int cmp_ssaAIR(const void *iptr, const void *jptr)
{
int i = *((int *) iptr);
int j = *((int *) jptr);
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
return 0;
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(half/bin/newton/ssa,
NPairHalfBinNewtonSSA,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA)
#else
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_SSA_H
#define LMP_NPAIR_HALF_BIN_NEWTON_SSA_H
#include "npair.h"
namespace LAMMPS_NS {
class NPairHalfBinNewtonSSA : public NPair {
public:
NPairHalfBinNewtonSSA(class LAMMPS *);
~NPairHalfBinNewtonSSA() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "npair_halffull_newton_ssa.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
// allocate space for static class variable
// prototype for non-class function
static int *ssaAIRptr;
static int cmp_ssaAIR(const void *, const void *);
/* ---------------------------------------------------------------------- */
NPairHalffullNewtonSSA::NPairHalffullNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
build half list from full list for use by Shardlow Spliting Algorithm
pair stored once if i,j are both owned and i < j
if j is ghost, only store if j coords are "above and to the right" of i
works if full list is a skip list
------------------------------------------------------------------------- */
void NPairHalffullNewtonSSA::build(NeighList *list)
{
int i,j,ii,jj,n,jnum,joriginal;
int *neighptr,*jlist;
int nlocal = atom->nlocal;
int *ssaAIR = atom->ssaAIR;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
MyPage<int> *ipage = list->ipage;
int *ilist_full = list->listfull->ilist;
int *numneigh_full = list->listfull->numneigh;
int **firstneigh_full = list->listfull->firstneigh;
int inum_full = list->listfull->inum;
int inum = 0;
ipage->reset();
// loop over parent full list
for (ii = 0; ii < inum_full; ii++) {
int AIRct[8] = { 0 };
n = 0;
neighptr = ipage->vget();
i = ilist_full[ii];
// loop over full neighbor list
jlist = firstneigh_full[i];
jnum = numneigh_full[i];
for (jj = 0; jj < jnum; jj++) {
joriginal = jlist[jj];
j = joriginal & NEIGHMASK;
if (j < nlocal) {
if (i > j) continue;
++(AIRct[0]);
} else {
if (ssaAIR[j] < 2) continue; // skip ghost atoms not in AIR
++(AIRct[ssaAIR[j] - 1]);
}
neighptr[n++] = joriginal;
}
ilist[inum++] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage->vgot(n);
if (ipage->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
// sort the locals+ghosts in the neighbor list by their ssaAIR number
ssaAIRptr = atom->ssaAIR;
qsort(&(neighptr[0]), n, sizeof(int), cmp_ssaAIR);
// do a prefix sum on the counts to turn them into indexes
list->ndxAIR_ssa[i][0] = AIRct[0];
for (int ndx = 1; ndx < 8; ++ndx) {
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
}
}
list->inum = inum;
}
/* ----------------------------------------------------------------------
comparison function invoked by qsort()
accesses static class member ssaAIRptr, set before call to qsort()
------------------------------------------------------------------------- */
static int cmp_ssaAIR(const void *iptr, const void *jptr)
{
int i = *((int *) iptr);
int j = *((int *) jptr);
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
return 0;
}

View File

@ -0,0 +1,44 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(halffull/newton/ssa,
NPairHalffullNewtonSSA,
NP_HALFFULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON |
NP_ORTHO | NP_TRI | NP_SSA)
#else
#ifndef LMP_NPAIR_HALFFULL_NEWTON_SSA_H
#define LMP_NPAIR_HALFFULL_NEWTON_SSA_H
#include "npair.h"
namespace LAMMPS_NS {
class NPairHalffullNewtonSSA : public NPair {
public:
NPairHalffullNewtonSSA(class LAMMPS *);
~NPairHalffullNewtonSSA() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,64 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "nstencil_half_bin_2d_newton_ssa.h"
#include "neighbor.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
NStencilSSA(lmp) {}
/* ----------------------------------------------------------------------
create stencil based on bin geometry and cutoff
stencil = bins whose closest corner to central bin is within cutoff
sx,sy,sz = bin bounds = furthest the stencil could possibly extend
3d creates xyz stencil, 2d creates xy stencil
for half list with newton on:
stencil is bins to the "upper right" of central bin
stencil does not include self
additionally, includes the bins beyond nstencil that are needed
to locate all the Active Interaction Region (AIR) ghosts for SSA
------------------------------------------------------------------------- */
void NStencilHalfBin2dNewtonSSA::create()
{
int i,j,pos = 0;
for (j = 0; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (j > 0 || (j == 0 && i > 0))
if (bin_distance(i,j,0) < cutneighmaxsq)
stencil[pos++] = j*mbinx + i;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
if (bin_distance(i,j,0) < cutneighmaxsq)
stencil[pos++] = j*mbinx + i;
}
nstencil = pos; // record where full stencil ends
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NSTENCIL_CLASS
NStencilStyle(half/bin/2d/newton/ssa,
NStencilHalfBin2dNewtonSSA,
NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO)
#else
#ifndef LMP_NSTENCIL_HALF_BIN_2D_NEWTON_SSA_H
#define LMP_NSTENCIL_HALF_BIN_2D_NEWTON_SSA_H
#include "nstencil_ssa.h"
namespace LAMMPS_NS {
class NStencilHalfBin2dNewtonSSA : public NStencilSSA {
public:
NStencilHalfBin2dNewtonSSA(class LAMMPS *);
~NStencilHalfBin2dNewtonSSA() {}
void create();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,74 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "nstencil_half_bin_3d_newton_ssa.h"
#include "neighbor.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
NStencilSSA(lmp) {}
/* ----------------------------------------------------------------------
create stencil based on bin geometry and cutoff
stencil = bins whose closest corner to central bin is within cutoff
sx,sy,sz = bin bounds = furthest the stencil could possibly extend
3d creates xyz stencil, 2d creates xy stencil
for half list with newton on:
stencil is bins to the "upper right" of central bin
stencil does not include self
additionally, includes the bins beyond nstencil that are needed
to locate all the Active Interaction Region (AIR) ghosts for SSA
------------------------------------------------------------------------- */
void NStencilHalfBin3dNewtonSSA::create()
{
int i,j,k,pos = 0;
for (k = 0; k <= sz; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (k > 0 || j > 0 || (j == 0 && i > 0))
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (k = -sz; k < 0; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
// For k==0, make sure to skip already included bins
k = 0;
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil = pos; // record where full stencil ends
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NSTENCIL_CLASS
NStencilStyle(half/bin/3d/newton/ssa,
NStencilHalfBin3dNewtonSSA,
NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO)
#else
#ifndef LMP_NSTENCIL_HALF_BIN_3D_NEWTON_SSA_H
#define LMP_NSTENCIL_HALF_BIN_3D_NEWTON_SSA_H
#include "nstencil_ssa.h"
namespace LAMMPS_NS {
class NStencilHalfBin3dNewtonSSA : public NStencilSSA {
public:
NStencilHalfBin3dNewtonSSA(class LAMMPS *);
~NStencilHalfBin3dNewtonSSA() {}
void create();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -11,12 +11,26 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_NSTENCIL_SSA_H
#define LMP_NSTENCIL_SSA_H
#include "nstencil.h"
namespace LAMMPS_NS {
class NStencilSSA : public NStencil {
public:
NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { }
~NStencilSSA() {}
virtual void create() = 0;
int nstencil_half; // where the half stencil ends
};
}
#endif
/* ERROR/WARNING messages:
E: Neighbor list overflow, boost neigh_modify one
There are too many neighbors of a single atom. Use the neigh_modify
command to increase the max number of neighbors allowed for one atom.
You may also want to boost the page size.
*/

View File

@ -182,7 +182,7 @@ void PairDPDfdt::compute(int eflag, int vflag)
wr = 1.0 - r/cut[itype][jtype];
wd = wr*wr;
randnum = random->gaussian();
gamma_ij = sigma[itype][jtype]*sigma[itype][jtype]
gamma_ij = sigma[itype][jtype]*sigma[itype][jtype]
/ (2.0*force->boltz*temperature);
// conservative force = a0 * wd
@ -320,11 +320,9 @@ void PairDPDfdt::init_style()
splitFDT_flag = false;
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->ssa = 0;
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
splitFDT_flag = true;
neighbor->requests[irequest]->ssa = 1;
}
}

View File

@ -206,7 +206,7 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag)
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
rinv = 1.0/r;
wr = 1.0 - r/cut[itype][jtype];
wd = wr*wr;
wd = wr*wr;
delvx = vxtmp - v[j][0];
delvy = vytmp - v[j][1];
@ -214,11 +214,11 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag)
dot = delx*delvx + dely*delvy + delz*delvz;
randnum = random->gaussian();
// Compute the current temperature
theta_ij = 0.5*(1.0/dpdTheta[i] + 1.0/dpdTheta[j]);
theta_ij = 1.0/theta_ij;
gamma_ij = sigma[itype][jtype]*sigma[itype][jtype]
// Compute the current temperature
theta_ij = 0.5*(1.0/dpdTheta[i] + 1.0/dpdTheta[j]);
theta_ij = 1.0/theta_ij;
gamma_ij = sigma[itype][jtype]*sigma[itype][jtype]
/ (2.0*force->boltz*theta_ij);
// conservative force = a0 * wr
@ -239,44 +239,44 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag)
f[j][2] -= delz*fpair;
}
if (rmass) {
mass_i = rmass[i];
mass_j = rmass[j];
} else {
mass_i = mass[itype];
mass_j = mass[jtype];
}
massinv_i = 1.0 / mass_i;
massinv_j = 1.0 / mass_j;
if (rmass) {
mass_i = rmass[i];
mass_j = rmass[j];
} else {
mass_i = mass[itype];
mass_j = mass[jtype];
}
massinv_i = 1.0 / mass_i;
massinv_j = 1.0 / mass_j;
// Compute the mechanical and conductive energy, uMech and uCond
mu_ij = massinv_i + massinv_j;
mu_ij *= force->ftm2v;
// Compute the mechanical and conductive energy, uMech and uCond
mu_ij = massinv_i + massinv_j;
mu_ij *= force->ftm2v;
uTmp = gamma_ij*wd*rinv*rinv*dot*dot
uTmp = gamma_ij*wd*rinv*rinv*dot*dot
- 0.5*sigma[itype][jtype]*sigma[itype][jtype]*mu_ij*wd;
uTmp -= sigma[itype][jtype]*wr*rinv*dot*randnum*dtinvsqrt;
uTmp *= 0.5;
uTmp -= sigma[itype][jtype]*wr*rinv*dot*randnum*dtinvsqrt;
uTmp *= 0.5;
duMech[i] += uTmp;
if (newton_pair || j < nlocal) {
duMech[j] += uTmp;
}
// Compute uCond
randnum = random->gaussian();
kappa_ij = kappa[itype][jtype];
alpha_ij = sqrt(2.0*force->boltz*kappa_ij);
randPair = alpha_ij*wr*randnum*dtinvsqrt;
duMech[i] += uTmp;
if (newton_pair || j < nlocal) {
duMech[j] += uTmp;
}
// Compute uCond
randnum = random->gaussian();
kappa_ij = kappa[itype][jtype];
alpha_ij = sqrt(2.0*force->boltz*kappa_ij);
randPair = alpha_ij*wr*randnum*dtinvsqrt;
uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd;
uTmp += randPair;
duCond[i] += uTmp;
if (newton_pair || j < nlocal) {
duCond[j] -= uTmp;
}
uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd;
uTmp += randPair;
duCond[i] += uTmp;
if (newton_pair || j < nlocal) {
duCond[j] -= uTmp;
}
if (eflag) {
// unshifted eng of conservative term:
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
@ -408,11 +408,9 @@ void PairDPDfdtEnergy::init_style()
splitFDT_flag = false;
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->ssa = 0;
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
splitFDT_flag = true;
neighbor->requests[irequest]->ssa = 1;
}
bool eos_flag = false;

View File

@ -310,13 +310,13 @@ void PairExp6rx::compute(int eflag, int vflag)
uin1 = buck1*(6.0*rin1exp - alphaOld12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut);
win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc;
win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc;
aRep = -1.0*win1*powint(rin1,nRep)/nRep;
aRep = win1*powint(rin1,nRep)/nRep;
uin1rep = aRep/powint(rin1,nRep);
forceExp6 = -double(nRep)*aRep/powint(r,nRep);
forceExp6 = double(nRep)*aRep/powint(r,nRep);
fpairOldEXP6_12 = factor_lj*forceExp6*r2inv;
evdwlOldEXP6_12 = uin1 - uin1rep + aRep/powint(r,nRep);
@ -350,13 +350,13 @@ void PairExp6rx::compute(int eflag, int vflag)
uin1 = buck1*(6.0*rin1exp - alphaOld21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut);
win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc;
win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc;
aRep = -1.0*win1*powint(rin1,nRep)/nRep;
aRep = win1*powint(rin1,nRep)/nRep;
uin1rep = aRep/powint(rin1,nRep);
forceExp6 = -double(nRep)*aRep/powint(r,nRep);
forceExp6 = double(nRep)*aRep/powint(r,nRep);
fpairOldEXP6_21 = factor_lj*forceExp6*r2inv;
evdwlOldEXP6_21 = uin1 - uin1rep + aRep/powint(r,nRep);
@ -405,9 +405,9 @@ void PairExp6rx::compute(int eflag, int vflag)
uin1 = buck1*(6.0*rin1exp - alpha12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut);
win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc;
win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc;
aRep = -1.0*win1*powint(rin1,nRep)/nRep;
aRep = win1*powint(rin1,nRep)/nRep;
uin1rep = aRep/powint(rin1,nRep);
@ -437,9 +437,9 @@ void PairExp6rx::compute(int eflag, int vflag)
uin1 = buck1*(6.0*rin1exp - alpha21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut);
win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc;
win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc;
aRep = -1.0*win1*powint(rin1,nRep)/nRep;
aRep = win1*powint(rin1,nRep)/nRep;
uin1rep = aRep/powint(rin1,nRep);
@ -1102,6 +1102,32 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
rm2_old *= pow(nTotalOFA_old,fuchslinR);
}
}
// Check that no fractions are less than zero
if(fraction1 < 0.0){
if(fraction1 < -1.0e-10){
error->all(FLERR,"Computed fraction less than -1.0e-10");
}
fraction1 = 0.0;
}
if(fraction2 < 0.0){
if(fraction2 < -1.0e-10){
error->all(FLERR,"Computed fraction less than -1.0e-10");
}
fraction2 = 0.0;
}
if(fraction1_old < 0.0){
if(fraction1_old < -1.0e-10){
error->all(FLERR,"Computed fraction less than -1.0e-10");
}
fraction1_old = 0.0;
}
if(fraction2_old < 0.0){
if(fraction2_old < -1.0e-10){
error->all(FLERR,"Computed fraction less than -1.0e-10");
}
fraction2_old = 0.0;
}
}
/* ---------------------------------------------------------------------- */

View File

@ -182,7 +182,7 @@ void PairMultiLucy::compute(int eflag, int vflag)
f[j][2] -= delz*fpair;
}
if (evflag) ev_tally(i,j,nlocal,newton_pair,
0.0,0.0,fpair,delx,dely,delz);
0.0,0.0,fpair,delx,dely,delz);
}
}
@ -201,7 +201,7 @@ void PairMultiLucy::compute(int eflag, int vflag)
evdwl *=(pi*cutsq[itype][itype]*cutsq[itype][itype])/84.0;
if (evflag) ev_tally(0,0,nlocal,newton_pair,
evdwl,0.0,0.0,0.0,0.0,0.0);
evdwl,0.0,0.0,0.0,0.0,0.0);
}
if (vflag_fdotr) virial_fdotr_compute();

View File

@ -18,7 +18,7 @@ PairStyle(multi/lucy,PairMultiLucy)
#else
#ifndef LMP_PAIR_MULTI_LUCY_H
#define LMP_PAIR_MUTLI_LUCY_H
#define LMP_PAIR_MULTI_LUCY_H
#include "pair.h"

View File

@ -18,7 +18,7 @@ PairStyle(multi/lucy/rx,PairMultiLucyRX)
#else
#ifndef LMP_PAIR_MULTI_LUCY_RX_H
#define LMP_PAIR_MUTLI_LUCY_RX_H
#define LMP_PAIR_MULTI_LUCY_RX_H
#include "pair.h"

View File

@ -3,10 +3,6 @@
mode=$1
# enforce using portable C locale
LC_ALL=C
export LC_ALL
# arg1 = file, arg2 = file it depends on
action () {
@ -44,6 +40,10 @@ action intel_preprocess.h
action intel_buffers.h
action intel_buffers.cpp
action math_extra_intel.h
action nbin_intel.h
action nbin_intel.cpp
action npair_intel.h
action npair_intel.cpp
action intel_simd.h pair_sw_intel.cpp
action intel_intrinsics.h pair_tersoff_intel.cpp
action verlet_lrt_intel.h pppm.cpp
@ -58,18 +58,10 @@ if (test $mode = 1) then
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_USER_INTEL |' ../Makefile.package
fi
# force rebuild of files with LMP_USER_INTEL switch
touch ../accelerator_intel.h
elif (test $mode = 0) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package
fi
# force rebuild of files with LMP_USER_INTEL switch
touch ../accelerator_intel.h
fi

View File

@ -63,7 +63,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
_nbor_pack_width = 1;
_precision_mode = PREC_MODE_MIXED;
_offload_balance = 1.0;
_offload_balance = -1.0;
_overflow_flag[LMP_OVERFLOW] = 0;
_off_overflow_flag[LMP_OVERFLOW] = 0;
@ -189,10 +189,18 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
offload_cores = omp_get_num_procs();
omp_set_num_threads(offload_cores);
max_offload_threads = omp_get_max_threads();
#ifdef __AVX512F__
if ( (offload_cores / 4) % 2 == 1) {
offload_cores += 4;
max_offload_threads += 4;
}
#endif
}
_max_offload_threads = max_offload_threads;
_offload_cores = offload_cores;
if (_offload_threads == 0) _offload_threads = offload_cores;
if (_offload_cores > 244 && _offload_tpc > 2)
_offload_tpc = 2;
}
#endif
@ -317,8 +325,6 @@ void FixIntel::init()
error->all(FLERR,
"Currently, cannot use more than one intel style with hybrid.");
neighbor->fix_intel = (void *)this;
check_neighbor_intel();
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers->zero_ev();
@ -1006,8 +1012,10 @@ void FixIntel::set_offload_affinity()
int offload_threads = _offload_threads;
int offload_tpc = _offload_tpc;
int offload_affinity_balanced = _offload_affinity_balanced;
int offload_cores = _offload_cores;
#pragma offload target(mic:_cop) mandatory \
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced)
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced, \
offload_cores)
{
omp_set_num_threads(offload_threads);
#pragma omp parallel
@ -1015,20 +1023,24 @@ void FixIntel::set_offload_affinity()
int tnum = omp_get_thread_num();
kmp_affinity_mask_t mask;
kmp_create_affinity_mask(&mask);
int proc;
if (offload_affinity_balanced) {
proc = offload_threads * node_rank + tnum;
int proc = offload_threads * node_rank + tnum;
#ifdef __AVX512F__
proc = (proc / offload_tpc) + (proc % offload_tpc) *
((offload_cores) / 4);
proc += 68;
#else
if (offload_affinity_balanced)
proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
} else {
proc = offload_threads * node_rank + tnum;
else
proc += (proc / 4) * (4 - offload_tpc) + 1;
}
#endif
kmp_set_affinity_mask_proc(proc, &mask);
if (kmp_set_affinity(&mask) != 0)
printf("Could not set affinity on rank %d thread %d to %d\n",
node_rank, tnum, proc);
}
}
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
else if (_precision_mode == PREC_MODE_MIXED)

Some files were not shown because too many files have changed in this diff Show More