710 lines
22 KiB
C++
710 lines
22 KiB
C++
// clang-format off
|
|
/* ----------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
https://www.lammps.org/, Sandia National Laboratories
|
|
Steve Plimpton, sjplimp@sandia.gov
|
|
|
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
------------------------------------------------------------------------- */
|
|
|
|
/* ----------------------------------------------------------------------
|
|
Contributing authors: Stan Moore (SNL)
|
|
|
|
Tomas Oppelstrup (LLNL): Optimization which reduces the number
|
|
of iterations in the L,m1,m2 loops (by a factor of up to 10), and
|
|
avoids evaluation of Ylm functions of negative m
|
|
------------------------------------------------------------------------- */
|
|
|
|
#include "compute_orientorder_atom_kokkos.h"
|
|
|
|
#include "atom_kokkos.h"
|
|
#include "atom_masks.h"
|
|
#include "kokkos.h"
|
|
#include "math_const.h"
|
|
#include "math_special_kokkos.h"
|
|
#include "memory_kokkos.h"
|
|
#include "neigh_list.h"
|
|
#include "neigh_request.h"
|
|
#include "neighbor_kokkos.h"
|
|
#include "pair.h"
|
|
#include "update.h"
|
|
|
|
#include <cmath>
|
|
|
|
using namespace LAMMPS_NS;
|
|
using namespace MathConst;
|
|
using namespace MathSpecialKokkos;
|
|
using namespace std;
|
|
|
|
#ifdef DBL_EPSILON
|
|
#define MY_EPSILON (10.0*DBL_EPSILON)
|
|
#else
|
|
#define MY_EPSILON (10.0*2.220446049250313e-16)
|
|
#endif
|
|
|
|
#define QEPSILON 1.0e-6
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
ComputeOrientOrderAtomKokkos<DeviceType>::ComputeOrientOrderAtomKokkos(LAMMPS *lmp, int narg, char **arg) :
|
|
ComputeOrientOrderAtom(lmp, narg, arg)
|
|
{
|
|
kokkosable = 1;
|
|
atomKK = (AtomKokkos *) atom;
|
|
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
|
datamask_read = EMPTY_MASK;
|
|
datamask_modify = EMPTY_MASK;
|
|
|
|
host_flag = (execution_space == Host);
|
|
|
|
d_qnormfac = t_sna_1d("orientorder/atom:qnormfac",nqlist);
|
|
d_qnormfac2 = t_sna_1d("orientorder/atom:qnormfac2",nqlist);
|
|
|
|
auto h_qnormfac = Kokkos::create_mirror_view(d_qnormfac);
|
|
auto h_qnormfac2 = Kokkos::create_mirror_view(d_qnormfac2);
|
|
|
|
for (int il = 0; il < nqlist; il++) {
|
|
h_qnormfac[il] = qnormfac[il];
|
|
h_qnormfac2[il] = qnormfac2[il];
|
|
}
|
|
|
|
Kokkos::deep_copy(d_qnormfac,h_qnormfac);
|
|
Kokkos::deep_copy(d_qnormfac2,h_qnormfac2);
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
ComputeOrientOrderAtomKokkos<DeviceType>::~ComputeOrientOrderAtomKokkos()
|
|
{
|
|
if (copymode) return;
|
|
|
|
memoryKK->destroy_kokkos(k_qnarray,qnarray);
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::init()
|
|
{
|
|
ComputeOrientOrderAtom::init();
|
|
|
|
d_qlist = t_sna_1i("orientorder/atom:qlist",nqlist);
|
|
auto h_qlist = Kokkos::create_mirror_view(d_qlist);
|
|
for (int i = 0; i < nqlist; i++)
|
|
h_qlist(i) = qlist[i];
|
|
Kokkos::deep_copy(d_qlist,h_qlist);
|
|
|
|
// need an occasional full neighbor list
|
|
|
|
// irequest = neigh request made by parent class
|
|
|
|
int irequest = neighbor->nrequest - 1;
|
|
|
|
neighbor->requests[irequest]->
|
|
kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
|
|
!std::is_same<DeviceType,LMPDeviceType>::value;
|
|
neighbor->requests[irequest]->
|
|
kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
struct FindMaxNumNeighs {
|
|
typedef DeviceType device_type;
|
|
NeighListKokkos<DeviceType> k_list;
|
|
|
|
FindMaxNumNeighs(NeighListKokkos<DeviceType>* nl): k_list(*nl) {}
|
|
~FindMaxNumNeighs() {k_list.copymode = 1;}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator() (const int& ii, int& maxneigh) const {
|
|
const int i = k_list.d_ilist[ii];
|
|
const int num_neighs = k_list.d_numneigh[i];
|
|
if (maxneigh < num_neighs) maxneigh = num_neighs;
|
|
}
|
|
};
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
|
|
{
|
|
invoked_peratom = update->ntimestep;
|
|
|
|
// invoke full neighbor list (will copy or build if necessary)
|
|
|
|
neighbor->build_one(list);
|
|
|
|
inum = list->inum;
|
|
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
|
d_numneigh = k_list->d_numneigh;
|
|
d_neighbors = k_list->d_neighbors;
|
|
d_ilist = k_list->d_ilist;
|
|
|
|
// grow order parameter array if necessary
|
|
|
|
if (atom->nmax > nmax) {
|
|
memoryKK->destroy_kokkos(k_qnarray,qnarray);
|
|
nmax = atom->nmax;
|
|
memoryKK->create_kokkos(k_qnarray,qnarray,nmax,ncol,"orientorder/atom:qnarray");
|
|
array_atom = qnarray;
|
|
d_qnarray = k_qnarray.template view<DeviceType>();
|
|
}
|
|
|
|
chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user
|
|
chunk_offset = 0;
|
|
|
|
if (chunk_size > (int)d_ncount.extent(0)) {
|
|
d_qnm = t_sna_3c("orientorder/atom:qnm",chunk_size,nqlist,qmax+1);
|
|
d_ncount = t_sna_1i("orientorder/atom:ncount",chunk_size);
|
|
}
|
|
|
|
copymode = 1;
|
|
|
|
// insure distsq and nearest arrays are long enough
|
|
|
|
maxneigh = 0;
|
|
Kokkos::parallel_reduce("ComputeOrientOrderAtomKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Max<int>(maxneigh));
|
|
|
|
if (chunk_size > (int)d_distsq.extent(0) || maxneigh > (int)d_distsq.extent(1)) {
|
|
d_distsq = t_sna_2d_lr("orientorder/atom:distsq",chunk_size,maxneigh);
|
|
d_nearest = t_sna_2i_lr("orientorder/atom:nearest",chunk_size,maxneigh);
|
|
d_rlist = t_sna_3d_lr("orientorder/atom:rlist",chunk_size,maxneigh,3);
|
|
|
|
d_distsq_um = d_distsq;
|
|
d_rlist_um = d_rlist;
|
|
d_nearest_um = d_nearest;
|
|
}
|
|
|
|
// compute order parameter for each atom in group
|
|
// use full neighbor list to count atoms less than cutoff
|
|
|
|
atomKK->sync(execution_space,X_MASK|MASK_MASK);
|
|
x = atomKK->k_x.view<DeviceType>();
|
|
mask = atomKK->k_mask.view<DeviceType>();
|
|
|
|
int vector_length_default = 1;
|
|
int team_size_default = 1;
|
|
if (!host_flag)
|
|
team_size_default = 32;//max_neighs;
|
|
|
|
while (chunk_offset < inum) { // chunk up loop to prevent running out of memory
|
|
|
|
if (chunk_size > inum - chunk_offset)
|
|
chunk_size = inum - chunk_offset;
|
|
|
|
Kokkos::deep_copy(d_qnm,{0.0,0.0});
|
|
|
|
//Neigh
|
|
{
|
|
int vector_length = vector_length_default;
|
|
int team_size = team_size_default;
|
|
check_team_size_for<TagComputeOrientOrderAtomNeigh>(chunk_size,team_size,vector_length);
|
|
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh> policy_neigh(chunk_size,team_size,vector_length);
|
|
Kokkos::parallel_for("ComputeOrientOrderAtomNeigh",policy_neigh,*this);
|
|
}
|
|
|
|
//Select3
|
|
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomSelect3> policy_select3(0,chunk_size);
|
|
Kokkos::parallel_for("ComputeOrientOrderAtomSelect3",policy_select3,*this);
|
|
|
|
//BOOP1
|
|
{
|
|
int vector_length = vector_length_default;
|
|
int team_size = team_size_default;
|
|
check_team_size_for<TagComputeOrientOrderAtomBOOP1>(chunk_size,team_size,vector_length);
|
|
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1> policy_boop1(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
|
|
Kokkos::parallel_for("ComputeOrientOrderAtomBOOP1",policy_boop1,*this);
|
|
}
|
|
|
|
//BOOP2
|
|
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomBOOP2> policy_boop2(0,chunk_size);
|
|
Kokkos::parallel_for("ComputeOrientOrderAtomBOOP2",policy_boop2,*this);
|
|
|
|
chunk_offset += chunk_size;
|
|
} // end while
|
|
|
|
copymode = 0;
|
|
|
|
k_qnarray.template modify<DeviceType>();
|
|
k_qnarray.template sync<LMPHostType>();
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh>::member_type& team) const
|
|
{
|
|
const int ii = team.league_rank();
|
|
const int i = d_ilist[ii + chunk_offset];
|
|
if (mask[i] & groupbit) {
|
|
const X_FLOAT xtmp = x(i,0);
|
|
const X_FLOAT ytmp = x(i,1);
|
|
const X_FLOAT ztmp = x(i,2);
|
|
const int jnum = d_numneigh[i];
|
|
|
|
// loop over list of all neighbors within force cutoff
|
|
// distsq[] = distance sq to each
|
|
// rlist[] = distance vector to each
|
|
// nearest[] = atom indices of neighbors
|
|
|
|
int ncount = 0;
|
|
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum),
|
|
[&] (const int jj, int& count) {
|
|
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
|
int j = d_neighbors(i,jj);
|
|
j &= NEIGHMASK;
|
|
const F_FLOAT delx = x(j,0) - xtmp;
|
|
const F_FLOAT dely = x(j,1) - ytmp;
|
|
const F_FLOAT delz = x(j,2) - ztmp;
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
if (rsq < cutsq)
|
|
count++;
|
|
});
|
|
},ncount);
|
|
|
|
d_ncount(ii) = ncount;
|
|
|
|
if (team.team_rank() == 0)
|
|
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,jnum),
|
|
[&] (const int jj, int& offset, bool final) {
|
|
int j = d_neighbors(i,jj);
|
|
j &= NEIGHMASK;
|
|
const F_FLOAT delx = x(j,0) - xtmp;
|
|
const F_FLOAT dely = x(j,1) - ytmp;
|
|
const F_FLOAT delz = x(j,2) - ztmp;
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
if (rsq < cutsq) {
|
|
if (final) {
|
|
d_distsq(ii,offset) = rsq;
|
|
d_rlist(ii,offset,0) = delx;
|
|
d_rlist(ii,offset,1) = dely;
|
|
d_rlist(ii,offset,2) = delz;
|
|
d_nearest(ii,offset) = j;
|
|
}
|
|
offset++;
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomSelect3,const int& ii) const {
|
|
|
|
const int i = d_ilist[ii + chunk_offset];
|
|
const int ncount = d_ncount(ii);
|
|
|
|
// if not nnn neighbors, order parameter = 0
|
|
|
|
if ((ncount == 0) || (ncount < nnn)) {
|
|
for (int jj = 0; jj < ncol; jj++)
|
|
d_qnarray(i,jj) = 0.0;
|
|
return;
|
|
}
|
|
|
|
// if nnn > 0, use only nearest nnn neighbors
|
|
|
|
if (nnn > 0) {
|
|
select3(nnn, ncount, ii);
|
|
d_ncount(ii) = nnn;
|
|
}
|
|
}
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomBOOP1,const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1>::member_type& team) const {
|
|
|
|
// Extract the atom number
|
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() %
|
|
((chunk_size+team.team_size()-1)/team.team_size()));
|
|
if (ii >= chunk_size) return;
|
|
|
|
// Extract the neighbor number
|
|
const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size());
|
|
const int ncount = d_ncount(ii);
|
|
if (jj >= ncount) return;
|
|
|
|
// if not nnn neighbors, order parameter = 0
|
|
|
|
if ((ncount == 0) || (ncount < nnn))
|
|
return;
|
|
|
|
calc_boop1(ncount, ii, jj);
|
|
}
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomBOOP2,const int& ii) const {
|
|
const int ncount = d_ncount(ii);
|
|
|
|
// if not nnn neighbors, order parameter = 0
|
|
|
|
if ((ncount == 0) || (ncount < nnn))
|
|
return;
|
|
|
|
calc_boop2(ncount, ii);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
select3 routine from Numerical Recipes (slightly modified)
|
|
find k smallest values in array of length n
|
|
sort auxiliary arrays at same time
|
|
------------------------------------------------------------------------- */
|
|
|
|
// Use no-op do while to create single statement
|
|
|
|
#define SWAP(view,i,j) do { \
|
|
tmp = view(i); view(i) = view(j); view(j) = tmp; \
|
|
} while (0)
|
|
|
|
#define ISWAP(view,i,j) do { \
|
|
itmp = view(i); view(i) = view(j); view(j) = itmp; \
|
|
} while (0)
|
|
|
|
#define SWAP3(view,i,j) do { \
|
|
tmp = view(i,0); view(i,0) = view(j,0); view(j,0) = tmp; \
|
|
tmp = view(i,1); view(i,1) = view(j,1); view(j,1) = tmp; \
|
|
tmp = view(i,2); view(i,2) = view(j,2); view(j,2) = tmp; \
|
|
} while (0)
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::select3(int k, int n, int ii) const
|
|
{
|
|
int i,ir,j,l,mid,ia,itmp;
|
|
double a,tmp,a3[3];
|
|
|
|
auto arr = Kokkos::subview(d_distsq_um, ii, Kokkos::ALL);
|
|
auto iarr = Kokkos::subview(d_nearest_um, ii, Kokkos::ALL);
|
|
auto arr3 = Kokkos::subview(d_rlist_um, ii, Kokkos::ALL, Kokkos::ALL);
|
|
|
|
l = 0;
|
|
ir = n-1;
|
|
for (;;) {
|
|
if (ir <= l+1) {
|
|
if (ir == l+1 && arr[ir] < arr[l]) {
|
|
SWAP(arr,l,ir);
|
|
ISWAP(iarr,l,ir);
|
|
SWAP3(arr3,l,ir);
|
|
}
|
|
return;
|
|
} else {
|
|
mid=((l+ir+2) >> 1) - 1;
|
|
SWAP(arr,mid,l+1);
|
|
ISWAP(iarr,mid,l+1);
|
|
SWAP3(arr3,mid,l+1);
|
|
if (arr[l] > arr[ir]) {
|
|
SWAP(arr,l,ir);
|
|
ISWAP(iarr,l,ir);
|
|
SWAP3(arr3,l,ir);
|
|
}
|
|
if (arr[l+1] > arr[ir]) {
|
|
SWAP(arr,l+1,ir);
|
|
ISWAP(iarr,l+1,ir);
|
|
SWAP3(arr3,l+1,ir);
|
|
}
|
|
if (arr[l] > arr[l+1]) {
|
|
SWAP(arr,l,l+1);
|
|
ISWAP(iarr,l,l+1);
|
|
SWAP3(arr3,l,l+1);
|
|
}
|
|
i = l+1;
|
|
j = ir;
|
|
a = arr[l+1];
|
|
ia = iarr[l+1];
|
|
a3[0] = arr3(l+1,0);
|
|
a3[1] = arr3(l+1,1);
|
|
a3[2] = arr3(l+1,2);
|
|
for (;;) {
|
|
do i++; while (arr[i] < a);
|
|
do j--; while (arr[j] > a);
|
|
if (j < i) break;
|
|
SWAP(arr,i,j);
|
|
ISWAP(iarr,i,j);
|
|
SWAP3(arr3,i,j);
|
|
}
|
|
arr[l+1] = arr[j];
|
|
arr[j] = a;
|
|
iarr[l+1] = iarr[j];
|
|
iarr[j] = ia;
|
|
arr3(l+1,0) = arr3(j,0);
|
|
arr3(l+1,1) = arr3(j,1);
|
|
arr3(l+1,2) = arr3(j,2);
|
|
arr3(j,0) = a3[0];
|
|
arr3(j,1) = a3[1];
|
|
arr3(j,2) = a3[2];
|
|
if (j+1 >= k) ir = j-1;
|
|
if (j+1 <= k) l = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
calculate the bond orientational order parameters
|
|
------------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii, int ineigh) const
|
|
{
|
|
const double r0 = d_rlist(ii,ineigh,0);
|
|
const double r1 = d_rlist(ii,ineigh,1);
|
|
const double r2 = d_rlist(ii,ineigh,2);
|
|
const double rmag = sqrt(r0*r0 + r1*r1 + r2*r2);
|
|
if (rmag <= MY_EPSILON) {
|
|
return;
|
|
}
|
|
|
|
const double costheta = r2 / rmag;
|
|
SNAcomplex expphi = {r0,r1};
|
|
const double rxymag = sqrt(expphi.re*expphi.re+expphi.im*expphi.im);
|
|
if (rxymag <= MY_EPSILON) {
|
|
expphi.re = 1.0;
|
|
expphi.im = 0.0;
|
|
} else {
|
|
const double rxymaginv = 1.0/rxymag;
|
|
expphi.re *= rxymaginv;
|
|
expphi.im *= rxymaginv;
|
|
}
|
|
|
|
for (int il = 0; il < nqlist; il++) {
|
|
const int l = d_qlist[il];
|
|
|
|
// calculate spherical harmonics
|
|
// Ylm, -l <= m <= l
|
|
// sign convention: sign(Yll(0,0)) = (-1)^l
|
|
|
|
//d_qnm(ii,il,l).re += polar_prefactor(l, 0, costheta);
|
|
const double polar_pf = polar_prefactor(l, 0, costheta);
|
|
Kokkos::atomic_add(&(d_qnm(ii,il,0).re), polar_pf);
|
|
SNAcomplex expphim = {expphi.re,expphi.im};
|
|
for (int m = 1; m <= +l; m++) {
|
|
const double prefactor = polar_prefactor(l, m, costheta);
|
|
SNAcomplex ylm = {prefactor * expphim.re, prefactor * expphim.im};
|
|
Kokkos::atomic_add(&(d_qnm(ii,il,m).re), ylm.re);
|
|
Kokkos::atomic_add(&(d_qnm(ii,il,m).im), ylm.im);
|
|
// Skip calculation of qnm for m<0 due to symmetry
|
|
SNAcomplex tmp;
|
|
tmp.re = expphim.re*expphi.re - expphim.im*expphi.im;
|
|
tmp.im = expphim.re*expphi.im + expphim.im*expphi.re;
|
|
expphim.re = tmp.re;
|
|
expphim.im = tmp.im;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
calculate the bond orientational order parameters
|
|
------------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) const
|
|
{
|
|
const int i = d_ilist[ii + chunk_offset];
|
|
|
|
// convert sums to averages
|
|
|
|
double facn = 1.0 / ncount;
|
|
for (int il = 0; il < nqlist; il++) {
|
|
int l = d_qlist[il];
|
|
for (int m = 0; m < l+1; m++) {
|
|
d_qnm(ii,il,m).re *= facn;
|
|
d_qnm(ii,il,m).im *= facn;
|
|
}
|
|
}
|
|
|
|
// calculate Q_l
|
|
// NOTE: optional W_l_hat and components of Q_qlcomp use these stored Q_l values
|
|
|
|
int jj = 0;
|
|
for (int il = 0; il < nqlist; il++) {
|
|
int l = d_qlist[il];
|
|
double qm_sum = d_qnm(ii,il,0).re*d_qnm(ii,il,0).re;
|
|
for (int m = 1; m < l+1; m++)
|
|
qm_sum += 2.0*(d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im);
|
|
d_qnarray(i,jj++) = d_qnormfac(il) * sqrt(qm_sum);
|
|
}
|
|
|
|
// calculate W_l
|
|
|
|
int nterms = 0;
|
|
int widx_count = 0;
|
|
if (wlflag || wlhatflag) {
|
|
for (int il = 0; il < nqlist; il++) {
|
|
int l = d_qlist[il];
|
|
double wlsum = 0.0;
|
|
for (int m1 = -l; m1 <= 0; m1++) {
|
|
for (int m2 = 0; m2 <= ((-m1)>>1); m2++) {
|
|
const int m3 = -(m1 + m2);
|
|
// Loop enforces -L<=m1<=0<=m2<=m3<=L, and m1+m2+m3=0
|
|
|
|
// For even L, W3j is invariant under permutation of
|
|
// (m1,m2,m3) and (m1,m2,m3)->(-m1,-m2,-m3). The loop
|
|
// structure enforces visiting only one member of each
|
|
// such symmetry (invariance) group.
|
|
|
|
//const int sgn = 1 - 2*(m1&1);
|
|
const int sgn = powint(-1,m1); // sgn = (-1)^m
|
|
// m1 <= 0, and Qlm[-m] = (-1)^m*conjg(Qlm[m])
|
|
SNAcomplex Q1Q2;
|
|
Q1Q2.re = (d_qnm(ii,il,-m1).re*d_qnm(ii,il,m2).re + d_qnm(ii,il,-m1).im*d_qnm(ii,il,m2).im)*sgn;
|
|
Q1Q2.im = (d_qnm(ii,il,-m1).re*d_qnm(ii,il,m2).im - d_qnm(ii,il,-m1).im*d_qnm(ii,il,m2).re)*sgn;
|
|
const double Q1Q2Q3 = Q1Q2.re*d_qnm(ii,il,m3).re - Q1Q2.im*d_qnm(ii,il,m3).im;
|
|
const double c = d_w3jlist[widx_count++];
|
|
wlsum += Q1Q2Q3*c;
|
|
}
|
|
}
|
|
d_qnarray(i,jj++) = wlsum/d_qnormfac2(il);
|
|
nterms++;
|
|
}
|
|
}
|
|
|
|
// calculate W_l_hat
|
|
|
|
if (wlhatflag) {
|
|
const int jptr = jj-nterms;
|
|
if (!wlflag) jj = jptr;
|
|
for (int il = 0; il < nqlist; il++) {
|
|
if (d_qnarray(i,il) < QEPSILON)
|
|
d_qnarray(i,jj++) = 0.0;
|
|
else {
|
|
const double qnfac = d_qnormfac(il)/d_qnarray(i,il);
|
|
d_qnarray(i,jj++) = d_qnarray(i,jptr+il) * (qnfac*qnfac*qnfac) * d_qnormfac2(il);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate components of Q_l/|Q_l|, for l=qlcomp
|
|
|
|
if (qlcompflag) {
|
|
const int il = iqlcomp;
|
|
const int l = qlcomp;
|
|
if (d_qnarray(i,il) < QEPSILON)
|
|
for (int m = 0; m < 2*l+1; m++) {
|
|
d_qnarray(i,jj++) = 0.0;
|
|
d_qnarray(i,jj++) = 0.0;
|
|
}
|
|
else {
|
|
const double qnfac = d_qnormfac(il)/d_qnarray(i,il);
|
|
for (int m = -l; m < 0; m++) {
|
|
// Computed only qnm for m>=0.
|
|
// qnm[-m] = (-1)^m * conjg(qnm[m])
|
|
//const int sgn = 1 - 2*(m&1); // sgn = (-1)^m
|
|
const int sgn = powint(-1,m); // sgn = (-1)^m
|
|
d_qnarray(i,jj++) = d_qnm(ii,il,-m).re * qnfac * sgn;
|
|
d_qnarray(i,jj++) = -d_qnm(ii,il,-m).im * qnfac * sgn;
|
|
}
|
|
for (int m = 0; m < l+1; m++) {
|
|
d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
|
|
d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
polar prefactor for spherical harmonic Y_l^m, where
|
|
Y_l^m (theta, phi) = prefactor(l, m, cos(theta)) * exp(i*m*phi)
|
|
------------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
double ComputeOrientOrderAtomKokkos<DeviceType>::polar_prefactor(int l, int m, double costheta) const
|
|
{
|
|
const int mabs = abs(m);
|
|
|
|
double prefactor = 1.0;
|
|
for (int i=l-mabs+1; i < l+mabs+1; ++i)
|
|
prefactor *= static_cast<double>(i);
|
|
|
|
prefactor = sqrt(static_cast<double>(2*l+1)/(MY_4PI*prefactor))
|
|
* associated_legendre(l,mabs,costheta);
|
|
|
|
if ((m < 0) && (m % 2)) prefactor = -prefactor;
|
|
|
|
return prefactor;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
associated legendre polynomial
|
|
sign convention: P(l,l) = (2l-1)!!(-sqrt(1-x^2))^l
|
|
------------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
double ComputeOrientOrderAtomKokkos<DeviceType>::associated_legendre(int l, int m, double x) const
|
|
{
|
|
if (l < m) return 0.0;
|
|
|
|
double p(1.0), pm1(0.0), pm2(0.0);
|
|
|
|
if (m != 0) {
|
|
const double msqx = -sqrt(1.0-x*x);
|
|
for (int i=1; i < m+1; ++i)
|
|
p *= static_cast<double>(2*i-1) * msqx;
|
|
}
|
|
|
|
for (int i=m+1; i < l+1; ++i) {
|
|
pm2 = pm1;
|
|
pm1 = p;
|
|
p = (static_cast<double>(2*i-1)*x*pm1
|
|
- static_cast<double>(i+m-1)*pm2) / static_cast<double>(i-m);
|
|
}
|
|
|
|
return p;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
Initialize table of Wigner 3j symbols
|
|
------------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::init_wigner3j()
|
|
{
|
|
ComputeOrientOrderAtom::init_wigner3j();
|
|
|
|
d_w3jlist = t_sna_1d("computeorientorderatom:w3jlist",widx_max);
|
|
auto h_w3jlist = Kokkos::create_mirror_view(d_w3jlist);
|
|
|
|
for (int i = 0; i< widx_max; i++)
|
|
h_w3jlist(i) = w3jlist[i];
|
|
|
|
Kokkos::deep_copy(d_w3jlist,h_w3jlist);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
check max team size
|
|
------------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
template<class TagStyle>
|
|
void ComputeOrientOrderAtomKokkos<DeviceType>::check_team_size_for(int inum, int &team_size, int vector_length) {
|
|
int team_size_max;
|
|
|
|
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
|
|
|
|
if (team_size*vector_length > team_size_max)
|
|
team_size = team_size_max/vector_length;
|
|
}
|
|
|
|
namespace LAMMPS_NS {
|
|
template class ComputeOrientOrderAtomKokkos<LMPDeviceType>;
|
|
#ifdef LMP_KOKKOS_GPU
|
|
template class ComputeOrientOrderAtomKokkos<LMPHostType>;
|
|
#endif
|
|
}
|