Merge pull request #1895 from stanmoore1/kk_compute_orientorder_atom
Add Kokkos version of compute orientorder/atom
This commit is contained in:
@ -3,6 +3,9 @@
|
|||||||
compute orientorder/atom command
|
compute orientorder/atom command
|
||||||
================================
|
================================
|
||||||
|
|
||||||
|
compute orientorder/atom/kk command
|
||||||
|
=======================
|
||||||
|
|
||||||
Syntax
|
Syntax
|
||||||
""""""
|
""""""
|
||||||
|
|
||||||
@ -128,6 +131,30 @@ too frequently.
|
|||||||
:doc:`special_bonds <special_bonds>` command that includes all pairs in
|
:doc:`special_bonds <special_bonds>` command that includes all pairs in
|
||||||
the neighbor list.
|
the neighbor list.
|
||||||
|
|
||||||
|
----------
|
||||||
|
|
||||||
|
|
||||||
|
Styles with a *gpu*\ , *intel*\ , *kk*\ , *omp*\ , or *opt* suffix are
|
||||||
|
functionally the same as the corresponding style without the suffix.
|
||||||
|
They have been optimized to run faster, depending on your available
|
||||||
|
hardware, as discussed on the :doc:`Speed packages <Speed_packages>` doc
|
||||||
|
page. The accelerated styles take the same arguments and should
|
||||||
|
produce the same results, except for round-off and precision issues.
|
||||||
|
|
||||||
|
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||||
|
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||||
|
LAMMPS was built with those packages. See the :doc:`Build package <Build_package>` doc page for more info.
|
||||||
|
|
||||||
|
You can specify the accelerated styles explicitly in your input script
|
||||||
|
by including their suffix, or you can use the :doc:`-suffix command-line switch <Run_options>` when you invoke LAMMPS, or you can use the
|
||||||
|
:doc:`suffix <suffix>` command in your input script.
|
||||||
|
|
||||||
|
See the :doc:`Speed packages <Speed_packages>` doc page for more
|
||||||
|
instructions on how to use the accelerated styles effectively.
|
||||||
|
|
||||||
|
|
||||||
|
----------
|
||||||
|
|
||||||
**Output info:**
|
**Output info:**
|
||||||
|
|
||||||
This compute calculates a per-atom array with *nlvalues* columns,
|
This compute calculates a per-atom array with *nlvalues* columns,
|
||||||
|
|||||||
@ -85,6 +85,8 @@ action comm_kokkos.cpp
|
|||||||
action comm_kokkos.h
|
action comm_kokkos.h
|
||||||
action comm_tiled_kokkos.cpp
|
action comm_tiled_kokkos.cpp
|
||||||
action comm_tiled_kokkos.h
|
action comm_tiled_kokkos.h
|
||||||
|
action compute_orientorder_atom_kokkos.cpp
|
||||||
|
action compute_orientorder_atom_kokkos.h
|
||||||
action compute_temp_kokkos.cpp
|
action compute_temp_kokkos.cpp
|
||||||
action compute_temp_kokkos.h
|
action compute_temp_kokkos.h
|
||||||
action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp
|
action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp
|
||||||
|
|||||||
739
src/KOKKOS/compute_orientorder_atom_kokkos.cpp
Normal file
739
src/KOKKOS/compute_orientorder_atom_kokkos.cpp
Normal file
@ -0,0 +1,739 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: Stan Moore (SNL)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "compute_orientorder_atom_kokkos.h"
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cmath>
|
||||||
|
#include "atom_kokkos.h"
|
||||||
|
#include "update.h"
|
||||||
|
#include "modify.h"
|
||||||
|
#include "neighbor_kokkos.h"
|
||||||
|
#include "neigh_list.h"
|
||||||
|
#include "neigh_request.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "pair.h"
|
||||||
|
#include "comm.h"
|
||||||
|
#include "memory_kokkos.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "math_const.h"
|
||||||
|
#include "atom_masks.h"
|
||||||
|
#include "kokkos.h"
|
||||||
|
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
using namespace MathConst;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#ifdef DBL_EPSILON
|
||||||
|
#define MY_EPSILON (10.0*DBL_EPSILON)
|
||||||
|
#else
|
||||||
|
#define MY_EPSILON (10.0*2.220446049250313e-16)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define QEPSILON 1.0e-6
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
ComputeOrientOrderAtomKokkos<DeviceType>::ComputeOrientOrderAtomKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||||
|
ComputeOrientOrderAtom(lmp, narg, arg)
|
||||||
|
{
|
||||||
|
kokkosable = 1;
|
||||||
|
atomKK = (AtomKokkos *) atom;
|
||||||
|
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||||
|
datamask_read = EMPTY_MASK;
|
||||||
|
datamask_modify = EMPTY_MASK;
|
||||||
|
|
||||||
|
host_flag = (execution_space == Host);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
ComputeOrientOrderAtomKokkos<DeviceType>::~ComputeOrientOrderAtomKokkos()
|
||||||
|
{
|
||||||
|
if (copymode) return;
|
||||||
|
|
||||||
|
memoryKK->destroy_kokkos(k_qnarray,qnarray);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::init()
|
||||||
|
{
|
||||||
|
ComputeOrientOrderAtom::init();
|
||||||
|
|
||||||
|
d_qlist = t_sna_1i("orientorder/atom:qlist",nqlist);
|
||||||
|
auto h_qlist = Kokkos::create_mirror_view(d_qlist);
|
||||||
|
for (int i = 0; i < nqlist; i++)
|
||||||
|
h_qlist(i) = qlist[i];
|
||||||
|
Kokkos::deep_copy(d_qlist,h_qlist);
|
||||||
|
|
||||||
|
// need an occasional full neighbor list
|
||||||
|
|
||||||
|
// irequest = neigh request made by parent class
|
||||||
|
|
||||||
|
int irequest = neighbor->nrequest - 1;
|
||||||
|
|
||||||
|
neighbor->requests[irequest]->
|
||||||
|
kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
|
||||||
|
!std::is_same<DeviceType,LMPDeviceType>::value;
|
||||||
|
neighbor->requests[irequest]->
|
||||||
|
kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
struct FindMaxNumNeighs {
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
NeighListKokkos<DeviceType> k_list;
|
||||||
|
|
||||||
|
FindMaxNumNeighs(NeighListKokkos<DeviceType>* nl): k_list(*nl) {}
|
||||||
|
~FindMaxNumNeighs() {k_list.copymode = 1;}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& ii, int& maxneigh) const {
|
||||||
|
const int i = k_list.d_ilist[ii];
|
||||||
|
const int num_neighs = k_list.d_numneigh[i];
|
||||||
|
if (maxneigh < num_neighs) maxneigh = num_neighs;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
|
||||||
|
{
|
||||||
|
invoked_peratom = update->ntimestep;
|
||||||
|
|
||||||
|
// invoke full neighbor list (will copy or build if necessary)
|
||||||
|
|
||||||
|
neighbor->build_one(list);
|
||||||
|
|
||||||
|
inum = list->inum;
|
||||||
|
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
||||||
|
d_numneigh = k_list->d_numneigh;
|
||||||
|
d_neighbors = k_list->d_neighbors;
|
||||||
|
d_ilist = k_list->d_ilist;
|
||||||
|
|
||||||
|
// grow order parameter array if necessary
|
||||||
|
|
||||||
|
if (atom->nmax > nmax) {
|
||||||
|
memoryKK->destroy_kokkos(k_qnarray,qnarray);
|
||||||
|
nmax = atom->nmax;
|
||||||
|
memoryKK->create_kokkos(k_qnarray,qnarray,nmax,ncol,"orientorder/atom:qnarray");
|
||||||
|
array_atom = qnarray;
|
||||||
|
d_qnarray = k_qnarray.template view<DeviceType>();
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user
|
||||||
|
chunk_offset = 0;
|
||||||
|
|
||||||
|
if (chunk_size > d_ncount.extent(0)) {
|
||||||
|
d_qnm = t_sna_3c("orientorder/atom:qnm",chunk_size,nqlist,2*qmax+1);
|
||||||
|
d_ncount = t_sna_1i("orientorder/atom:ncount",chunk_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
copymode = 1;
|
||||||
|
|
||||||
|
// insure distsq and nearest arrays are long enough
|
||||||
|
|
||||||
|
maxneigh = 0;
|
||||||
|
Kokkos::parallel_reduce("ComputeOrientOrderAtomKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Max<int>(maxneigh));
|
||||||
|
|
||||||
|
if (chunk_size > d_distsq.extent(0) || maxneigh > d_distsq.extent(1)) {
|
||||||
|
d_distsq = t_sna_2d_lr("orientorder/atom:distsq",chunk_size,maxneigh);
|
||||||
|
d_nearest = t_sna_2i_lr("orientorder/atom:nearest",chunk_size,maxneigh);
|
||||||
|
d_rlist = t_sna_3d_lr("orientorder/atom:rlist",chunk_size,maxneigh,3);
|
||||||
|
|
||||||
|
d_distsq_um = d_distsq;
|
||||||
|
d_rlist_um = d_rlist;
|
||||||
|
d_nearest_um = d_nearest;
|
||||||
|
}
|
||||||
|
|
||||||
|
// compute order parameter for each atom in group
|
||||||
|
// use full neighbor list to count atoms less than cutoff
|
||||||
|
|
||||||
|
atomKK->sync(execution_space,X_MASK|MASK_MASK);
|
||||||
|
x = atomKK->k_x.view<DeviceType>();
|
||||||
|
mask = atomKK->k_mask.view<DeviceType>();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(d_qnm,{0.0,0.0});
|
||||||
|
|
||||||
|
int vector_length_default = 1;
|
||||||
|
int team_size_default = 1;
|
||||||
|
if (!host_flag)
|
||||||
|
team_size_default = 32;//max_neighs;
|
||||||
|
|
||||||
|
while (chunk_offset < inum) { // chunk up loop to prevent running out of memory
|
||||||
|
|
||||||
|
if (chunk_size > inum - chunk_offset)
|
||||||
|
chunk_size = inum - chunk_offset;
|
||||||
|
|
||||||
|
//Neigh
|
||||||
|
{
|
||||||
|
int vector_length = vector_length_default;
|
||||||
|
int team_size = team_size_default;
|
||||||
|
check_team_size_for<TagComputeOrientOrderAtomNeigh>(chunk_size,team_size,vector_length);
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh> policy_neigh(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for("ComputeOrientOrderAtomNeigh",policy_neigh,*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Select3
|
||||||
|
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomSelect3> policy_select3(0,chunk_size);
|
||||||
|
Kokkos::parallel_for("ComputeOrientOrderAtomSelect3",policy_select3,*this);
|
||||||
|
|
||||||
|
//BOOP1
|
||||||
|
{
|
||||||
|
int vector_length = vector_length_default;
|
||||||
|
int team_size = team_size_default;
|
||||||
|
check_team_size_for<TagComputeOrientOrderAtomBOOP1>(chunk_size,team_size,vector_length);
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1> policy_boop1(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for("ComputeOrientOrderAtomBOOP1",policy_boop1,*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
//BOOP2
|
||||||
|
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomBOOP2> policy_boop2(0,chunk_size);
|
||||||
|
Kokkos::parallel_for("ComputeOrientOrderAtomBOOP2",policy_boop2,*this);
|
||||||
|
|
||||||
|
chunk_offset += chunk_size;
|
||||||
|
} // end while
|
||||||
|
|
||||||
|
copymode = 0;
|
||||||
|
|
||||||
|
k_qnarray.template modify<DeviceType>();
|
||||||
|
k_qnarray.template sync<LMPHostType>();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh>::member_type& team) const
|
||||||
|
{
|
||||||
|
const int ii = team.league_rank();
|
||||||
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
|
if (mask[i] & groupbit) {
|
||||||
|
const X_FLOAT xtmp = x(i,0);
|
||||||
|
const X_FLOAT ytmp = x(i,1);
|
||||||
|
const X_FLOAT ztmp = x(i,2);
|
||||||
|
const int jnum = d_numneigh[i];
|
||||||
|
|
||||||
|
// loop over list of all neighbors within force cutoff
|
||||||
|
// distsq[] = distance sq to each
|
||||||
|
// rlist[] = distance vector to each
|
||||||
|
// nearest[] = atom indices of neighbors
|
||||||
|
|
||||||
|
int ncount = 0;
|
||||||
|
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum),
|
||||||
|
[&] (const int jj, int& count) {
|
||||||
|
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||||
|
int j = d_neighbors(i,jj);
|
||||||
|
j &= NEIGHMASK;
|
||||||
|
const F_FLOAT delx = x(j,0) - xtmp;
|
||||||
|
const F_FLOAT dely = x(j,1) - ytmp;
|
||||||
|
const F_FLOAT delz = x(j,2) - ztmp;
|
||||||
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||||
|
if (rsq < cutsq)
|
||||||
|
count++;
|
||||||
|
});
|
||||||
|
},ncount);
|
||||||
|
|
||||||
|
d_ncount(ii) = ncount;
|
||||||
|
|
||||||
|
if (team.team_rank() == 0)
|
||||||
|
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,jnum),
|
||||||
|
[&] (const int jj, int& offset, bool final) {
|
||||||
|
int j = d_neighbors(i,jj);
|
||||||
|
j &= NEIGHMASK;
|
||||||
|
const F_FLOAT delx = x(j,0) - xtmp;
|
||||||
|
const F_FLOAT dely = x(j,1) - ytmp;
|
||||||
|
const F_FLOAT delz = x(j,2) - ztmp;
|
||||||
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||||
|
if (rsq < cutsq) {
|
||||||
|
if (final) {
|
||||||
|
d_distsq(ii,offset) = rsq;
|
||||||
|
d_rlist(ii,offset,0) = delx;
|
||||||
|
d_rlist(ii,offset,1) = dely;
|
||||||
|
d_rlist(ii,offset,2) = delz;
|
||||||
|
d_nearest(ii,offset) = j;
|
||||||
|
}
|
||||||
|
offset++;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomSelect3,const int& ii) const {
|
||||||
|
|
||||||
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
|
const int ncount = d_ncount(ii);
|
||||||
|
|
||||||
|
// if not nnn neighbors, order parameter = 0;
|
||||||
|
|
||||||
|
if ((ncount == 0) || (ncount < nnn)) {
|
||||||
|
for (int jj = 0; jj < ncol; jj++)
|
||||||
|
d_qnarray(i,jj) = 0.0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if nnn > 0, use only nearest nnn neighbors
|
||||||
|
|
||||||
|
if (nnn > 0) {
|
||||||
|
select3(nnn, ncount, ii);
|
||||||
|
d_ncount(ii) = nnn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomBOOP1,const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1>::member_type& team) const {
|
||||||
|
|
||||||
|
// Extract the atom number
|
||||||
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() %
|
||||||
|
((chunk_size+team.team_size()-1)/team.team_size()));
|
||||||
|
if (ii >= chunk_size) return;
|
||||||
|
|
||||||
|
// Extract the neighbor number
|
||||||
|
const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size());
|
||||||
|
const int ncount = d_ncount(ii);
|
||||||
|
if (jj >= ncount) return;
|
||||||
|
|
||||||
|
// if not nnn neighbors, order parameter = 0;
|
||||||
|
|
||||||
|
if ((ncount == 0) || (ncount < nnn))
|
||||||
|
return;
|
||||||
|
|
||||||
|
calc_boop1(ncount, ii, jj);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomBOOP2,const int& ii) const {
|
||||||
|
const int ncount = d_ncount(ii);
|
||||||
|
calc_boop2(ncount, ii);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
select3 routine from Numerical Recipes (slightly modified)
|
||||||
|
find k smallest values in array of length n
|
||||||
|
sort auxiliary arrays at same time
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
// Use no-op do while to create single statement
|
||||||
|
|
||||||
|
#define SWAP(view,i,j) do { \
|
||||||
|
tmp = view(i); view(i) = view(j); view(j) = tmp; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define ISWAP(view,i,j) do { \
|
||||||
|
itmp = view(i); view(i) = view(j); view(j) = itmp; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define SWAP3(view,i,j) do { \
|
||||||
|
tmp = view(i,0); view(i,0) = view(j,0); view(j,0) = tmp; \
|
||||||
|
tmp = view(i,1); view(i,1) = view(j,1); view(j,1) = tmp; \
|
||||||
|
tmp = view(i,2); view(i,2) = view(j,2); view(j,2) = tmp; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::select3(int k, int n, int ii) const
|
||||||
|
{
|
||||||
|
int i,ir,j,l,mid,ia,itmp;
|
||||||
|
double a,tmp,a3[3];
|
||||||
|
|
||||||
|
auto arr = Kokkos::subview(d_distsq_um, ii, Kokkos::ALL);
|
||||||
|
auto iarr = Kokkos::subview(d_nearest_um, ii, Kokkos::ALL);
|
||||||
|
auto arr3 = Kokkos::subview(d_rlist_um, ii, Kokkos::ALL, Kokkos::ALL);
|
||||||
|
|
||||||
|
l = 0;
|
||||||
|
ir = n-1;
|
||||||
|
for (;;) {
|
||||||
|
if (ir <= l+1) {
|
||||||
|
if (ir == l+1 && arr[ir] < arr[l]) {
|
||||||
|
SWAP(arr,l,ir);
|
||||||
|
ISWAP(iarr,l,ir);
|
||||||
|
SWAP3(arr3,l,ir);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
mid=((l+ir+2) >> 1) - 1;
|
||||||
|
SWAP(arr,mid,l+1);
|
||||||
|
ISWAP(iarr,mid,l+1);
|
||||||
|
SWAP3(arr3,mid,l+1);
|
||||||
|
if (arr[l] > arr[ir]) {
|
||||||
|
SWAP(arr,l,ir);
|
||||||
|
ISWAP(iarr,l,ir);
|
||||||
|
SWAP3(arr3,l,ir);
|
||||||
|
}
|
||||||
|
if (arr[l+1] > arr[ir]) {
|
||||||
|
SWAP(arr,l+1,ir);
|
||||||
|
ISWAP(iarr,l+1,ir);
|
||||||
|
SWAP3(arr3,l+1,ir);
|
||||||
|
}
|
||||||
|
if (arr[l] > arr[l+1]) {
|
||||||
|
SWAP(arr,l,l+1);
|
||||||
|
ISWAP(iarr,l,l+1);
|
||||||
|
SWAP3(arr3,l,l+1);
|
||||||
|
}
|
||||||
|
i = l+1;
|
||||||
|
j = ir;
|
||||||
|
a = arr[l+1];
|
||||||
|
ia = iarr[l+1];
|
||||||
|
a3[0] = arr3(l+1,0);
|
||||||
|
a3[1] = arr3(l+1,1);
|
||||||
|
a3[2] = arr3(l+1,2);
|
||||||
|
for (;;) {
|
||||||
|
do i++; while (arr[i] < a);
|
||||||
|
do j--; while (arr[j] > a);
|
||||||
|
if (j < i) break;
|
||||||
|
SWAP(arr,i,j);
|
||||||
|
ISWAP(iarr,i,j);
|
||||||
|
SWAP3(arr3,i,j);
|
||||||
|
}
|
||||||
|
arr[l+1] = arr[j];
|
||||||
|
arr[j] = a;
|
||||||
|
iarr[l+1] = iarr[j];
|
||||||
|
iarr[j] = ia;
|
||||||
|
arr3(l+1,0) = arr3(j,0);
|
||||||
|
arr3(l+1,1) = arr3(j,1);
|
||||||
|
arr3(l+1,2) = arr3(j,2);
|
||||||
|
arr3(j,0) = a3[0];
|
||||||
|
arr3(j,1) = a3[1];
|
||||||
|
arr3(j,2) = a3[2];
|
||||||
|
if (j+1 >= k) ir = j-1;
|
||||||
|
if (j+1 <= k) l = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
calculate the bond orientational order parameters
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int ncount, int ii, int ineigh) const
|
||||||
|
{
|
||||||
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
|
|
||||||
|
const double r0 = d_rlist(ii,ineigh,0);
|
||||||
|
const double r1 = d_rlist(ii,ineigh,1);
|
||||||
|
const double r2 = d_rlist(ii,ineigh,2);
|
||||||
|
const double rmag = sqrt(r0*r0 + r1*r1 + r2*r2);
|
||||||
|
if(rmag <= MY_EPSILON) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const double costheta = r2 / rmag;
|
||||||
|
SNAcomplex expphi = {r0,r1};
|
||||||
|
const double rxymag = sqrt(expphi.re*expphi.re+expphi.im*expphi.im);
|
||||||
|
if(rxymag <= MY_EPSILON) {
|
||||||
|
expphi.re = 1.0;
|
||||||
|
expphi.im = 0.0;
|
||||||
|
} else {
|
||||||
|
const double rxymaginv = 1.0/rxymag;
|
||||||
|
expphi.re *= rxymaginv;
|
||||||
|
expphi.im *= rxymaginv;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int il = 0; il < nqlist; il++) {
|
||||||
|
const int l = d_qlist[il];
|
||||||
|
|
||||||
|
//d_qnm(ii,il,l).re += polar_prefactor(l, 0, costheta);
|
||||||
|
const double polar_pf = polar_prefactor(l, 0, costheta);
|
||||||
|
Kokkos::atomic_add(&(d_qnm(ii,il,l).re), polar_pf);
|
||||||
|
SNAcomplex expphim = {expphi.re,expphi.im};
|
||||||
|
for(int m = 1; m <= +l; m++) {
|
||||||
|
const double prefactor = polar_prefactor(l, m, costheta);
|
||||||
|
SNAcomplex c = {prefactor * expphim.re, prefactor * expphim.im};
|
||||||
|
//d_qnm(ii,il,m+l).re += c.re;
|
||||||
|
//d_qnm(ii,il,m+l).im += c.im;
|
||||||
|
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).re), c.re);
|
||||||
|
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).im), c.im);
|
||||||
|
if(m & 1) {
|
||||||
|
//d_qnm(ii,il,-m+l).re -= c.re;
|
||||||
|
//d_qnm(ii,il,-m+l).im += c.im;
|
||||||
|
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), -c.re);
|
||||||
|
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), c.im);
|
||||||
|
} else {
|
||||||
|
//d_qnm(ii,il,-m+l).re += c.re;
|
||||||
|
//d_qnm(ii,il,-m+l).im -= c.im;
|
||||||
|
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), c.re);
|
||||||
|
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), -c.im);
|
||||||
|
}
|
||||||
|
SNAcomplex tmp;
|
||||||
|
tmp.re = expphim.re*expphi.re - expphim.im*expphi.im;
|
||||||
|
tmp.im = expphim.re*expphi.im + expphim.im*expphi.re;
|
||||||
|
expphim.re = tmp.re;
|
||||||
|
expphim.im = tmp.im;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
calculate the bond orientational order parameters
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) const
|
||||||
|
{
|
||||||
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
|
|
||||||
|
// convert sums to averages
|
||||||
|
|
||||||
|
double facn = 1.0 / ncount;
|
||||||
|
for (int il = 0; il < nqlist; il++) {
|
||||||
|
int l = d_qlist[il];
|
||||||
|
for(int m = 0; m < 2*l+1; m++) {
|
||||||
|
d_qnm(ii,il,m).re *= facn;
|
||||||
|
d_qnm(ii,il,m).im *= facn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate Q_l
|
||||||
|
// NOTE: optional W_l_hat and components of Q_qlcomp use these stored Q_l values
|
||||||
|
|
||||||
|
int jj = 0;
|
||||||
|
for (int il = 0; il < nqlist; il++) {
|
||||||
|
int l = d_qlist[il];
|
||||||
|
double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||||
|
double qm_sum = 0.0;
|
||||||
|
for(int m = 0; m < 2*l+1; m++)
|
||||||
|
qm_sum += d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im;
|
||||||
|
d_qnarray(i,jj++) = qnormfac * sqrt(qm_sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate W_l
|
||||||
|
|
||||||
|
if (wlflag) {
|
||||||
|
int idxcg_count = 0;
|
||||||
|
for (int il = 0; il < nqlist; il++) {
|
||||||
|
int l = d_qlist[il];
|
||||||
|
double wlsum = 0.0;
|
||||||
|
for(int m1 = 0; m1 < 2*l+1; m1++) {
|
||||||
|
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||||
|
int m = m1 + m2 - l;
|
||||||
|
SNAcomplex qm1qm2;
|
||||||
|
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||||
|
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
|
||||||
|
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
|
||||||
|
idxcg_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate W_l_hat
|
||||||
|
|
||||||
|
if (wlhatflag) {
|
||||||
|
int idxcg_count = 0;
|
||||||
|
for (int il = 0; il < nqlist; il++) {
|
||||||
|
int l = d_qlist[il];
|
||||||
|
double wlsum = 0.0;
|
||||||
|
for(int m1 = 0; m1 < 2*l+1; m1++) {
|
||||||
|
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||||
|
const int m = m1 + m2 - l;
|
||||||
|
SNAcomplex qm1qm2;
|
||||||
|
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||||
|
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
|
||||||
|
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
|
||||||
|
idxcg_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Whats = [w/(q/np.sqrt(np.pi * 4 / (2 * l + 1)))**3 if abs(q) > 1.0e-6 else 0.0 for l,q,w in zip(range(1,max_l+1),Qs,Ws)]
|
||||||
|
if (d_qnarray(i,il) < QEPSILON)
|
||||||
|
d_qnarray(i,jj++) = 0.0;
|
||||||
|
else {
|
||||||
|
const double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||||
|
const double qnfac = qnormfac/d_qnarray(i,il);
|
||||||
|
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0)*(qnfac*qnfac*qnfac);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate components of Q_l, for l=qlcomp
|
||||||
|
|
||||||
|
if (qlcompflag) {
|
||||||
|
const int il = iqlcomp;
|
||||||
|
const int l = qlcomp;
|
||||||
|
if (d_qnarray(i,il) < QEPSILON)
|
||||||
|
for(int m = 0; m < 2*l+1; m++) {
|
||||||
|
d_qnarray(i,jj++) = 0.0;
|
||||||
|
d_qnarray(i,jj++) = 0.0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||||
|
const double qnfac = qnormfac/d_qnarray(i,il);
|
||||||
|
for(int m = 0; m < 2*l+1; m++) {
|
||||||
|
d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
|
||||||
|
d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
polar prefactor for spherical harmonic Y_l^m, where
|
||||||
|
Y_l^m (theta, phi) = prefactor(l, m, cos(theta)) * exp(i*m*phi)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
double ComputeOrientOrderAtomKokkos<DeviceType>::polar_prefactor(int l, int m, double costheta) const
|
||||||
|
{
|
||||||
|
const int mabs = abs(m);
|
||||||
|
|
||||||
|
double prefactor = 1.0;
|
||||||
|
for (int i=l-mabs+1; i < l+mabs+1; ++i)
|
||||||
|
prefactor *= static_cast<double>(i);
|
||||||
|
|
||||||
|
prefactor = sqrt(static_cast<double>(2*l+1)/(MY_4PI*prefactor))
|
||||||
|
* associated_legendre(l,mabs,costheta);
|
||||||
|
|
||||||
|
if ((m < 0) && (m % 2)) prefactor = -prefactor;
|
||||||
|
|
||||||
|
return prefactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
associated legendre polynomial
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
double ComputeOrientOrderAtomKokkos<DeviceType>::associated_legendre(int l, int m, double x) const
|
||||||
|
{
|
||||||
|
if (l < m) return 0.0;
|
||||||
|
|
||||||
|
double p(1.0), pm1(0.0), pm2(0.0);
|
||||||
|
|
||||||
|
if (m != 0) {
|
||||||
|
const double sqx = sqrt(1.0-x*x);
|
||||||
|
for (int i=1; i < m+1; ++i)
|
||||||
|
p *= static_cast<double>(2*i-1) * sqx;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i=m+1; i < l+1; ++i) {
|
||||||
|
pm2 = pm1;
|
||||||
|
pm1 = p;
|
||||||
|
p = (static_cast<double>(2*i-1)*x*pm1
|
||||||
|
- static_cast<double>(i+m-1)*pm2) / static_cast<double>(i-m);
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
assign Clebsch-Gordan coefficients
|
||||||
|
using the quasi-binomial formula VMK 8.2.1(3)
|
||||||
|
specialized for case j1=j2=j=l
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
|
||||||
|
{
|
||||||
|
double sum,dcg,sfaccg, sfac1, sfac2;
|
||||||
|
int m, aa2, bb2, cc2;
|
||||||
|
int ifac, idxcg_count;
|
||||||
|
|
||||||
|
idxcg_count = 0;
|
||||||
|
for (int il = 0; il < nqlist; il++) {
|
||||||
|
int l = qlist[il];
|
||||||
|
for(int m1 = 0; m1 < 2*l+1; m1++)
|
||||||
|
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
|
||||||
|
idxcg_count++;
|
||||||
|
}
|
||||||
|
idxcg_max = idxcg_count;
|
||||||
|
d_cglist = t_sna_1d("orientorder/atom:d_cglist",idxcg_max);
|
||||||
|
auto h_cglist = Kokkos::create_mirror_view(d_cglist);
|
||||||
|
|
||||||
|
idxcg_count = 0;
|
||||||
|
for (int il = 0; il < nqlist; il++) {
|
||||||
|
int l = qlist[il];
|
||||||
|
for(int m1 = 0; m1 < 2*l+1; m1++) {
|
||||||
|
aa2 = m1 - l;
|
||||||
|
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||||
|
bb2 = m2 - l;
|
||||||
|
m = aa2 + bb2 + l;
|
||||||
|
|
||||||
|
sum = 0.0;
|
||||||
|
for (int z = MAX(0, MAX(-aa2, bb2));
|
||||||
|
z <= MIN(l, MIN(l - aa2, l + bb2)); z++) {
|
||||||
|
ifac = z % 2 ? -1 : 1;
|
||||||
|
sum += ifac /
|
||||||
|
(factorial(z) *
|
||||||
|
factorial(l - z) *
|
||||||
|
factorial(l - aa2 - z) *
|
||||||
|
factorial(l + bb2 - z) *
|
||||||
|
factorial(aa2 + z) *
|
||||||
|
factorial(-bb2 + z));
|
||||||
|
}
|
||||||
|
|
||||||
|
cc2 = m - l;
|
||||||
|
sfaccg = sqrt(factorial(l + aa2) *
|
||||||
|
factorial(l - aa2) *
|
||||||
|
factorial(l + bb2) *
|
||||||
|
factorial(l - bb2) *
|
||||||
|
factorial(l + cc2) *
|
||||||
|
factorial(l - cc2) *
|
||||||
|
(2*l + 1));
|
||||||
|
|
||||||
|
sfac1 = factorial(3*l + 1);
|
||||||
|
sfac2 = factorial(l);
|
||||||
|
dcg = sqrt(sfac2*sfac2*sfac2 / sfac1);
|
||||||
|
|
||||||
|
h_cglist[idxcg_count] = sum * dcg * sfaccg;
|
||||||
|
idxcg_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Kokkos::deep_copy(d_cglist,h_cglist);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
check max team size
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
template<class TagStyle>
|
||||||
|
void ComputeOrientOrderAtomKokkos<DeviceType>::check_team_size_for(int inum, int &team_size, int vector_length) {
|
||||||
|
int team_size_max;
|
||||||
|
|
||||||
|
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
|
||||||
|
|
||||||
|
if(team_size*vector_length > team_size_max)
|
||||||
|
team_size = team_size_max/vector_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
template class ComputeOrientOrderAtomKokkos<LMPDeviceType>;
|
||||||
|
#ifdef KOKKOS_ENABLE_CUDA
|
||||||
|
template class ComputeOrientOrderAtomKokkos<LMPHostType>;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
140
src/KOKKOS/compute_orientorder_atom_kokkos.h
Normal file
140
src/KOKKOS/compute_orientorder_atom_kokkos.h
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef COMPUTE_CLASS
|
||||||
|
|
||||||
|
ComputeStyle(orientorder/atom/kk,ComputeOrientOrderAtomKokkos<LMPDeviceType>)
|
||||||
|
ComputeStyle(orientorder/atom/kk/device,ComputeOrientOrderAtomKokkos<LMPDeviceType>)
|
||||||
|
ComputeStyle(orientorder/atom/kk/host,ComputeOrientOrderAtomKokkos<LMPHostType>)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_COMPUTE_ORIENTORDER_ATOM_KOKKOS_H
|
||||||
|
#define LMP_COMPUTE_ORIENTORDER_ATOM_KOKKOS_H
|
||||||
|
|
||||||
|
#include "compute_orientorder_atom.h"
|
||||||
|
#include "kokkos_type.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
struct TagComputeOrientOrderAtomNeigh{};
|
||||||
|
struct TagComputeOrientOrderAtomSelect3{};
|
||||||
|
struct TagComputeOrientOrderAtomBOOP1{};
|
||||||
|
struct TagComputeOrientOrderAtomBOOP2{};
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
class ComputeOrientOrderAtomKokkos : public ComputeOrientOrderAtom {
|
||||||
|
public:
|
||||||
|
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||||
|
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
|
||||||
|
typedef Kokkos::View<double*, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||||
|
typedef Kokkos::View<int**, Kokkos::LayoutRight, DeviceType> t_sna_2i_lr;
|
||||||
|
typedef Kokkos::View<int**, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged> > t_sna_2i_lr_um;
|
||||||
|
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
||||||
|
typedef Kokkos::View<double**, DeviceType> t_sna_2d;
|
||||||
|
typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_sna_2d_lr;
|
||||||
|
typedef Kokkos::DualView<double**, Kokkos::LayoutRight, DeviceType> tdual_sna_2d_lr;
|
||||||
|
typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged> > t_sna_2d_lr_um;
|
||||||
|
typedef Kokkos::View<double***, DeviceType> t_sna_3d;
|
||||||
|
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType> t_sna_3d_lr;
|
||||||
|
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged> > t_sna_3d_lr_um;
|
||||||
|
typedef Kokkos::View<double***[3], DeviceType> t_sna_4d;
|
||||||
|
typedef Kokkos::View<double**[3], DeviceType> t_sna_3d3;
|
||||||
|
typedef Kokkos::View<double*****, DeviceType> t_sna_5d;
|
||||||
|
|
||||||
|
typedef Kokkos::View<SNAcomplex*, DeviceType> t_sna_1c;
|
||||||
|
typedef Kokkos::View<SNAcomplex*, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||||
|
typedef Kokkos::View<SNAcomplex**, DeviceType> t_sna_2c;
|
||||||
|
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_lr;
|
||||||
|
typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
|
||||||
|
typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
|
||||||
|
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
|
||||||
|
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
|
||||||
|
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
|
typedef int value_type;
|
||||||
|
|
||||||
|
ComputeOrientOrderAtomKokkos(class LAMMPS *, int, char **);
|
||||||
|
~ComputeOrientOrderAtomKokkos();
|
||||||
|
void init();
|
||||||
|
void compute_peratom();
|
||||||
|
t_sna_1i d_qlist;
|
||||||
|
|
||||||
|
template<class TagStyle>
|
||||||
|
void check_team_size_for(int, int&, int);
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagComputeOrientOrderAtomNeigh, const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagComputeOrientOrderAtomSelect3, const int& ii) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagComputeOrientOrderAtomBOOP1, const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagComputeOrientOrderAtomBOOP2, const int& ii) const;
|
||||||
|
|
||||||
|
DAT::tdual_float_2d k_qnarray;
|
||||||
|
typename AT::t_float_2d d_qnarray;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int inum,chunk_size,chunk_offset;
|
||||||
|
int host_flag;
|
||||||
|
|
||||||
|
typename AT::t_x_array_randomread x;
|
||||||
|
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||||
|
|
||||||
|
typename AT::t_neighbors_2d d_neighbors;
|
||||||
|
typename AT::t_int_1d_randomread d_ilist;
|
||||||
|
typename AT::t_int_1d_randomread d_numneigh;
|
||||||
|
|
||||||
|
t_sna_1i d_ncount;
|
||||||
|
t_sna_2d_lr d_distsq;
|
||||||
|
t_sna_2i_lr d_nearest;
|
||||||
|
t_sna_3d_lr d_rlist;
|
||||||
|
|
||||||
|
t_sna_2d_lr_um d_distsq_um;
|
||||||
|
t_sna_2i_lr_um d_nearest_um;
|
||||||
|
t_sna_3d_lr_um d_rlist_um;
|
||||||
|
|
||||||
|
t_sna_3c d_qnm;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void select3(int, int, int) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void calc_boop1(int, int, int) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void calc_boop2(int, int) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
double polar_prefactor(int, int, double) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
double associated_legendre(int, int, double) const;
|
||||||
|
|
||||||
|
void init_clebsch_gordan();
|
||||||
|
t_sna_1d d_cglist; // Clebsch-Gordan coeffs
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
*/
|
||||||
@ -31,6 +31,7 @@ template<class DeviceType>
|
|||||||
ComputeTempKokkos<DeviceType>::ComputeTempKokkos(LAMMPS *lmp, int narg, char **arg) :
|
ComputeTempKokkos<DeviceType>::ComputeTempKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||||
ComputeTemp(lmp, narg, arg)
|
ComputeTemp(lmp, narg, arg)
|
||||||
{
|
{
|
||||||
|
kokkosable = 1;
|
||||||
atomKK = (AtomKokkos *) atom;
|
atomKK = (AtomKokkos *) atom;
|
||||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||||
|
|
||||||
|
|||||||
@ -1029,6 +1029,59 @@ struct params_lj_coul {
|
|||||||
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
|
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Pair SNAP
|
||||||
|
|
||||||
|
typedef double SNAreal;
|
||||||
|
|
||||||
|
//typedef struct { SNAreal re, im; } SNAcomplex;
|
||||||
|
template <typename real>
|
||||||
|
struct alignas(2*sizeof(real)) SNAComplex
|
||||||
|
{
|
||||||
|
real re,im;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex() = default;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
|
||||||
|
: re(re), im(static_cast<real>(0.)) { ; }
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
|
||||||
|
: re(re), im(im) { ; }
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
|
||||||
|
: re(other.re), im(other.im) { ; }
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex& operator=(const SNAComplex& other) {
|
||||||
|
re = other.re; im = other.im;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(SNAComplex&& other)
|
||||||
|
: re(other.re), im(other.im) { ; }
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex& operator=(SNAComplex&& other) {
|
||||||
|
re = other.re; im = other.im;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex operator+(SNAComplex const& other) {
|
||||||
|
return SNAComplex(re + other.re, im + other.im);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex& operator+=(SNAComplex const& other) {
|
||||||
|
re += other.re; im += other.im;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename real>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
|
||||||
|
return SNAComplex<real>(r*self.re, r*self.im);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef SNAComplex<SNAreal> SNAcomplex;
|
||||||
|
|
||||||
|
|
||||||
#if defined(KOKKOS_ENABLE_CXX11)
|
#if defined(KOKKOS_ENABLE_CXX11)
|
||||||
#undef ISFINITE
|
#undef ISFINITE
|
||||||
#define ISFINITE(x) std::isfinite(x)
|
#define ISFINITE(x) std::isfinite(x)
|
||||||
|
|||||||
@ -25,64 +25,6 @@
|
|||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
typedef double SNAreal;
|
|
||||||
|
|
||||||
//typedef struct { SNAreal re, im; } SNAcomplex;
|
|
||||||
template <typename real>
|
|
||||||
struct alignas(2*sizeof(real)) SNAComplex
|
|
||||||
{
|
|
||||||
real re,im;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex() = default;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
|
|
||||||
: re(re), im(static_cast<real>(0.)) { ; }
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
|
|
||||||
: re(re), im(im) { ; }
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
|
|
||||||
: re(other.re), im(other.im) { ; }
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex& operator=(const SNAComplex& other) {
|
|
||||||
re = other.re; im = other.im;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(SNAComplex&& other)
|
|
||||||
: re(other.re), im(other.im) { ; }
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex& operator=(SNAComplex&& other) {
|
|
||||||
re = other.re; im = other.im;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex operator+(SNAComplex const& other) {
|
|
||||||
return SNAComplex(re + other.re, im + other.im);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex& operator+=(SNAComplex const& other) {
|
|
||||||
re += other.re; im += other.im;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename real>
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
|
|
||||||
return SNAComplex<real>(r*self.re, r*self.im);
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef SNAComplex<SNAreal> SNAcomplex;
|
|
||||||
|
|
||||||
//struct SNAKK_ZINDICES {
|
|
||||||
// int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
|
||||||
//};
|
|
||||||
//
|
|
||||||
//struct SNAKK_BINDICES {
|
|
||||||
// int j1, j2, j;
|
|
||||||
//};
|
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
class SNAKokkos {
|
class SNAKokkos {
|
||||||
|
|
||||||
|
|||||||
@ -102,6 +102,7 @@ Compute::Compute(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
datamask_modify = ALL_MASK;
|
datamask_modify = ALL_MASK;
|
||||||
|
|
||||||
copymode = 0;
|
copymode = 0;
|
||||||
|
kokkosable = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -91,7 +91,7 @@ class Compute : protected Pointers {
|
|||||||
ExecutionSpace execution_space;
|
ExecutionSpace execution_space;
|
||||||
unsigned int datamask_read,datamask_modify;
|
unsigned int datamask_read,datamask_modify;
|
||||||
|
|
||||||
int copymode;
|
int copymode,kokkosable;
|
||||||
|
|
||||||
Compute(class LAMMPS *, int, char **);
|
Compute(class LAMMPS *, int, char **);
|
||||||
virtual ~Compute();
|
virtual ~Compute();
|
||||||
|
|||||||
@ -61,6 +61,7 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
|
|||||||
wlflag = 0;
|
wlflag = 0;
|
||||||
wlhatflag = 0;
|
wlhatflag = 0;
|
||||||
qlcompflag = 0;
|
qlcompflag = 0;
|
||||||
|
chunksize = 16384;
|
||||||
|
|
||||||
// specify which orders to request
|
// specify which orders to request
|
||||||
|
|
||||||
@ -143,6 +144,13 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
|
|||||||
error->all(FLERR,"Illegal compute orientorder/atom command");
|
error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||||
cutsq = cutoff*cutoff;
|
cutsq = cutoff*cutoff;
|
||||||
iarg += 2;
|
iarg += 2;
|
||||||
|
} else if (strcmp(arg[iarg],"chunksize") == 0) {
|
||||||
|
if (iarg+2 > narg)
|
||||||
|
error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||||
|
chunksize = force->numeric(FLERR,arg[iarg+1]);
|
||||||
|
if (chunksize <= 0)
|
||||||
|
error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||||
|
iarg += 2;
|
||||||
} else error->all(FLERR,"Illegal compute orientorder/atom command");
|
} else error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,6 +170,8 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
|
|||||||
|
|
||||||
ComputeOrientOrderAtom::~ComputeOrientOrderAtom()
|
ComputeOrientOrderAtom::~ComputeOrientOrderAtom()
|
||||||
{
|
{
|
||||||
|
if (copymode) return;
|
||||||
|
|
||||||
memory->destroy(qnarray);
|
memory->destroy(qnarray);
|
||||||
memory->destroy(distsq);
|
memory->destroy(distsq);
|
||||||
memory->destroy(rlist);
|
memory->destroy(rlist);
|
||||||
|
|||||||
@ -28,16 +28,16 @@ class ComputeOrientOrderAtom : public Compute {
|
|||||||
public:
|
public:
|
||||||
ComputeOrientOrderAtom(class LAMMPS *, int, char **);
|
ComputeOrientOrderAtom(class LAMMPS *, int, char **);
|
||||||
~ComputeOrientOrderAtom();
|
~ComputeOrientOrderAtom();
|
||||||
void init();
|
virtual void init();
|
||||||
void init_list(int, class NeighList *);
|
void init_list(int, class NeighList *);
|
||||||
void compute_peratom();
|
virtual void compute_peratom();
|
||||||
double memory_usage();
|
double memory_usage();
|
||||||
double cutsq;
|
double cutsq;
|
||||||
int iqlcomp, qlcomp, qlcompflag, wlflag, wlhatflag;
|
int iqlcomp, qlcomp, qlcompflag, wlflag, wlhatflag;
|
||||||
int *qlist;
|
int *qlist;
|
||||||
int nqlist;
|
int nqlist;
|
||||||
|
|
||||||
private:
|
protected:
|
||||||
int nmax,maxneigh,ncol,nnn;
|
int nmax,maxneigh,ncol,nnn;
|
||||||
class NeighList *list;
|
class NeighList *list;
|
||||||
double *distsq;
|
double *distsq;
|
||||||
@ -59,9 +59,10 @@ class ComputeOrientOrderAtom : public Compute {
|
|||||||
static const int nmaxfactorial = 167;
|
static const int nmaxfactorial = 167;
|
||||||
static const double nfac_table[];
|
static const double nfac_table[];
|
||||||
double factorial(int);
|
double factorial(int);
|
||||||
void init_clebsch_gordan();
|
virtual void init_clebsch_gordan();
|
||||||
double *cglist; // Clebsch-Gordan coeffs
|
double *cglist; // Clebsch-Gordan coeffs
|
||||||
int idxcg_max;
|
int idxcg_max;
|
||||||
|
int chunksize;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user