Add optimized version of compute_orientorder_atom from Tomas Oppelstrup (LLNL)
This commit is contained in:
@ -13,7 +13,11 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
Contributing authors: Stan Moore (SNL)
|
||||
|
||||
Tomas Oppelstrup (LLNL): Optimization which reduces the number
|
||||
of iterations in the L,m1,m2 loops (by a factor of up to 10), and
|
||||
avoids evaluation of Ylm functions of negative m
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "compute_orientorder_atom_kokkos.h"
|
||||
@ -145,7 +149,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
|
||||
chunk_offset = 0;
|
||||
|
||||
if (chunk_size > (int)d_ncount.extent(0)) {
|
||||
d_qnm = t_sna_3c("orientorder/atom:qnm",chunk_size,nqlist,2*qmax+1);
|
||||
d_qnm = t_sna_3c("orientorder/atom:qnm",chunk_size,nqlist,qmax+1);
|
||||
d_ncount = t_sna_1i("orientorder/atom:ncount",chunk_size);
|
||||
}
|
||||
|
||||
@ -337,7 +341,6 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
|
||||
calc_boop2(ncount, ii);
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
select3 routine from Numerical Recipes (slightly modified)
|
||||
find k smallest values in array of length n
|
||||
@ -471,26 +474,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
|
||||
|
||||
//d_qnm(ii,il,l).re += polar_prefactor(l, 0, costheta);
|
||||
const double polar_pf = polar_prefactor(l, 0, costheta);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,l).re), polar_pf);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,0).re), polar_pf);
|
||||
SNAcomplex expphim = {expphi.re,expphi.im};
|
||||
for (int m = 1; m <= +l; m++) {
|
||||
const double prefactor = polar_prefactor(l, m, costheta);
|
||||
SNAcomplex ylm = {prefactor * expphim.re, prefactor * expphim.im};
|
||||
//d_qnm(ii,il,m+l).re += ylm.re;
|
||||
//d_qnm(ii,il,m+l).im += ylm.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).re), ylm.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).im), ylm.im);
|
||||
if (m & 1) {
|
||||
//d_qnm(ii,il,-m+l).re -= ylm.re;
|
||||
//d_qnm(ii,il,-m+l).im += ylm.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), -ylm.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), ylm.im);
|
||||
} else {
|
||||
//d_qnm(ii,il,-m+l).re += ylm.re;
|
||||
//d_qnm(ii,il,-m+l).im -= ylm.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), ylm.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), -ylm.im);
|
||||
}
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m).re), ylm.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m).im), ylm.im);
|
||||
// Skip calculation of qnm for m<0 due to symmetry
|
||||
SNAcomplex tmp;
|
||||
tmp.re = expphim.re*expphi.re - expphim.im*expphi.im;
|
||||
tmp.im = expphim.re*expphi.im + expphim.im*expphi.re;
|
||||
@ -515,7 +506,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
double facn = 1.0 / ncount;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = 0; m < l+1; m++) {
|
||||
d_qnm(ii,il,m).re *= facn;
|
||||
d_qnm(ii,il,m).im *= facn;
|
||||
}
|
||||
@ -528,56 +519,58 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
double qm_sum = 0.0;
|
||||
for (int m = 0; m < 2*l+1; m++)
|
||||
qm_sum += d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im;
|
||||
double qm_sum = d_qnm(ii,il,0).re*d_qnm(ii,il,0).re;
|
||||
for (int m = 1; m < l+1; m++)
|
||||
qm_sum += 2.0*(d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im);
|
||||
d_qnarray(i,jj++) = qnormfac * sqrt(qm_sum);
|
||||
}
|
||||
|
||||
// calculate W_l
|
||||
|
||||
if (wlflag) {
|
||||
int idxcg_count = 0;
|
||||
int nterms = 0;
|
||||
int widx_count = 0;
|
||||
if (wlflag || wlhatflag) {
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
double wlsum = 0.0;
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
int m = m1 + m2 - l;
|
||||
SNAcomplex qm1qm2;
|
||||
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
|
||||
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
|
||||
idxcg_count++;
|
||||
for (int m1 = -l; m1 <= 0; m1++) {
|
||||
for (int m2 = 0; m2 <= ((-m1)>>1); m2++) {
|
||||
const int m3 = -(m1 + m2);
|
||||
// Loop enforces -L<=m1<=0<=m2<=m3<=L, and m1+m2+m3=0
|
||||
|
||||
// For even L, W3j is invariant under permutation of
|
||||
// (m1,m2,m3) and (m1,m2,m3)->(-m1,-m2,-m3). The loop
|
||||
// structure enforces visiting only one member of each
|
||||
// such symmetry (invariance) group.
|
||||
|
||||
const int sgn = 1 - 2*(m1&1);
|
||||
// m1 <= 0, and Qlm[-m] = (-1)^m*conjg(Qlm[m]). sgn = (-1)^m.
|
||||
SNAcomplex Q1Q2;
|
||||
Q1Q2.re = (d_qnm(ii,il,-m1).re*d_qnm(ii,il,m2).re + d_qnm(ii,il,-m1).im*d_qnm(ii,il,m2).im)*sgn;
|
||||
Q1Q2.im = (d_qnm(ii,il,-m1).re*d_qnm(ii,il,m2).im - d_qnm(ii,il,-m1).im*d_qnm(ii,il,m2).re)*sgn;
|
||||
const double Q1Q2Q3 = Q1Q2.re*d_qnm(ii,il,m3).re - Q1Q2.im*d_qnm(ii,il,m3).im;
|
||||
const double c = d_w3jlist[widx_count++];
|
||||
wlsum += Q1Q2Q3*c;
|
||||
}
|
||||
}
|
||||
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0);
|
||||
nterms++;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate W_l_hat
|
||||
|
||||
if (wlhatflag) {
|
||||
int idxcg_count = 0;
|
||||
const int jptr = jj-nterms;
|
||||
if (!wlflag) jj = jptr;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
double wlsum = 0.0;
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
const int m = m1 + m2 - l;
|
||||
SNAcomplex qm1qm2;
|
||||
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
|
||||
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
|
||||
idxcg_count++;
|
||||
}
|
||||
}
|
||||
if (d_qnarray(i,il) < QEPSILON)
|
||||
d_qnarray(i,jj++) = 0.0;
|
||||
else {
|
||||
const double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
const double qnfac = qnormfac/d_qnarray(i,il);
|
||||
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0)*(qnfac*qnfac*qnfac);
|
||||
d_qnarray(i,jj++) = d_qnarray(i,jptr+il) * (qnfac*qnfac*qnfac) * sqrt(2.0*l+1.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -595,7 +588,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
else {
|
||||
const double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
const double qnfac = qnormfac/d_qnarray(i,il);
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = -l; m < 0; m++) {
|
||||
// Computed only qnm for m>=0.
|
||||
// qnm[-m] = (-1)^m * conjg(qnm[m])
|
||||
const int sgn = 1 - 2*(m&1); // sgn = (-1)^m
|
||||
d_qnarray(i,jj++) = d_qnm(ii,il,-m).re * qnfac * sgn;
|
||||
d_qnarray(i,jj++) = -d_qnm(ii,il,-m).im * qnfac * sgn;
|
||||
}
|
||||
for (int m = 0; m < l+1; m++) {
|
||||
d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
|
||||
d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
|
||||
}
|
||||
@ -657,70 +657,21 @@ double ComputeOrientOrderAtomKokkos<DeviceType>::associated_legendre(int l, int
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
assign Clebsch-Gordan coefficients
|
||||
using the quasi-binomial formula VMK 8.2.1(3)
|
||||
specialized for case j1=j2=j=l
|
||||
Initialize table of Wigner 3j symbols
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::init_wigner3j()
|
||||
{
|
||||
double sum,dcg,sfaccg, sfac1, sfac2;
|
||||
int m, aa2, bb2, cc2;
|
||||
int ifac, idxcg_count;
|
||||
ComputeOrientOrderAtom::init_wigner3j();
|
||||
|
||||
idxcg_count = 0;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++)
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
|
||||
idxcg_count++;
|
||||
}
|
||||
idxcg_max = idxcg_count;
|
||||
d_cglist = t_sna_1d("orientorder/atom:d_cglist",idxcg_max);
|
||||
auto h_cglist = Kokkos::create_mirror_view(d_cglist);
|
||||
d_w3jlist = t_sna_1d("computeorientorderatom:w3jlist",widx_max);
|
||||
auto h_w3jlist = Kokkos::create_mirror_view(d_w3jlist);
|
||||
|
||||
idxcg_count = 0;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
aa2 = m1 - l;
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
bb2 = m2 - l;
|
||||
m = aa2 + bb2 + l;
|
||||
for (int i = 0; i< widx_max; i++)
|
||||
h_w3jlist(i) = w3jlist[i];
|
||||
|
||||
sum = 0.0;
|
||||
for (int z = MAX(0, MAX(-aa2, bb2));
|
||||
z <= MIN(l, MIN(l - aa2, l + bb2)); z++) {
|
||||
ifac = z % 2 ? -1 : 1;
|
||||
sum += ifac /
|
||||
(factorial(z) *
|
||||
factorial(l - z) *
|
||||
factorial(l - aa2 - z) *
|
||||
factorial(l + bb2 - z) *
|
||||
factorial(aa2 + z) *
|
||||
factorial(-bb2 + z));
|
||||
}
|
||||
|
||||
cc2 = m - l;
|
||||
sfaccg = sqrt(factorial(l + aa2) *
|
||||
factorial(l - aa2) *
|
||||
factorial(l + bb2) *
|
||||
factorial(l - bb2) *
|
||||
factorial(l + cc2) *
|
||||
factorial(l - cc2) *
|
||||
(2*l + 1));
|
||||
|
||||
sfac1 = factorial(3*l + 1);
|
||||
sfac2 = factorial(l);
|
||||
dcg = sqrt(sfac2*sfac2*sfac2 / sfac1);
|
||||
|
||||
h_cglist[idxcg_count] = sum * dcg * sfaccg;
|
||||
idxcg_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
Kokkos::deep_copy(d_cglist,h_cglist);
|
||||
Kokkos::deep_copy(d_w3jlist,h_w3jlist);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -127,8 +127,8 @@ class ComputeOrientOrderAtomKokkos : public ComputeOrientOrderAtom {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double associated_legendre(int, int, double) const;
|
||||
|
||||
void init_clebsch_gordan();
|
||||
t_sna_1d d_cglist; // Clebsch-Gordan coeffs
|
||||
void init_wigner3j();
|
||||
t_sna_1d d_w3jlist; // Wigner coeffs
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -13,8 +13,11 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Aidan Thompson (SNL)
|
||||
Axel Kohlmeyer (Temple U)
|
||||
Contributing authors: Aidan Thompson (SNL), Axel Kohlmeyer (Temple U)
|
||||
|
||||
Tomas Oppelstrup (LLNL): Optimization which reduces the number
|
||||
of iterations in the L,m1,m2 loops (by a factor of up to 10), and
|
||||
avoids evaluation of Ylm functions of negative m
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "compute_orientorder_atom.h"
|
||||
@ -54,7 +57,7 @@ using namespace MathSpecial;
|
||||
ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg),
|
||||
qlist(nullptr), distsq(nullptr), nearest(nullptr), rlist(nullptr),
|
||||
qnarray(nullptr), qnm_r(nullptr), qnm_i(nullptr), cglist(nullptr)
|
||||
qnarray(nullptr), qnm_r(nullptr), qnm_i(nullptr), w3jlist(nullptr)
|
||||
{
|
||||
if (narg < 3 ) error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||
|
||||
@ -166,7 +169,7 @@ ComputeOrientOrderAtom::~ComputeOrientOrderAtom()
|
||||
memory->destroy(qlist);
|
||||
memory->destroy(qnm_r);
|
||||
memory->destroy(qnm_i);
|
||||
memory->destroy(cglist);
|
||||
memory->destroy(w3jlist);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -181,8 +184,8 @@ void ComputeOrientOrderAtom::init()
|
||||
error->all(FLERR,"Compute orientorder/atom cutoff is "
|
||||
"longer than pairwise cutoff");
|
||||
|
||||
memory->create(qnm_r,nqlist,2*qmax+1,"orientorder/atom:qnm_r");
|
||||
memory->create(qnm_i,nqlist,2*qmax+1,"orientorder/atom:qnm_i");
|
||||
memory->create(qnm_r,nqlist,qmax+1,"orientorder/atom:qnm_r");
|
||||
memory->create(qnm_i,nqlist,qmax+1,"orientorder/atom:qnm_i");
|
||||
|
||||
// need an occasional full neighbor list
|
||||
|
||||
@ -199,7 +202,7 @@ void ComputeOrientOrderAtom::init()
|
||||
if (count > 1 && comm->me == 0)
|
||||
error->warning(FLERR,"More than one compute orientorder/atom");
|
||||
|
||||
if (wlflag || wlhatflag) init_clebsch_gordan();
|
||||
if (wlflag || wlhatflag) init_wigner3j();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -424,7 +427,7 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
|
||||
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = 0; m < l+1; m++) {
|
||||
qnm_r[il][m] = 0.0;
|
||||
qnm_i[il][m] = 0.0;
|
||||
}
|
||||
@ -457,7 +460,7 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
|
||||
// Ylm, -l <= m <= l
|
||||
// sign convention: sign(Yll(0,0)) = (-1)^l
|
||||
|
||||
qnm_r[il][l] += polar_prefactor(l, 0, costheta);
|
||||
qnm_r[il][0] += polar_prefactor(l, 0, costheta);
|
||||
double expphim_r = expphi_r;
|
||||
double expphim_i = expphi_i;
|
||||
for (int m = 1; m <= +l; m++) {
|
||||
@ -465,15 +468,9 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
|
||||
double prefactor = polar_prefactor(l, m, costheta);
|
||||
double ylm_r = prefactor * expphim_r;
|
||||
double ylm_i = prefactor * expphim_i;
|
||||
qnm_r[il][m+l] += ylm_r;
|
||||
qnm_i[il][m+l] += ylm_i;
|
||||
if (m & 1) {
|
||||
qnm_r[il][-m+l] -= ylm_r;
|
||||
qnm_i[il][-m+l] += ylm_i;
|
||||
} else {
|
||||
qnm_r[il][-m+l] += ylm_r;
|
||||
qnm_i[il][-m+l] -= ylm_i;
|
||||
}
|
||||
qnm_r[il][m] += ylm_r;
|
||||
qnm_i[il][m] += ylm_i;
|
||||
// Skip calculation of qnm for m<0 due to symmetry
|
||||
double tmp_r = expphim_r*expphi_r - expphim_i*expphi_i;
|
||||
double tmp_i = expphim_r*expphi_i + expphim_i*expphi_r;
|
||||
expphim_r = tmp_r;
|
||||
@ -488,7 +485,7 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
|
||||
double facn = 1.0 / ncount;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = 0; m < l+1; m++) {
|
||||
qnm_r[il][m] *= facn;
|
||||
qnm_i[il][m] *= facn;
|
||||
}
|
||||
@ -501,54 +498,57 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
double qm_sum = 0.0;
|
||||
for (int m = 0; m < 2*l+1; m++)
|
||||
qm_sum += qnm_r[il][m]*qnm_r[il][m] + qnm_i[il][m]*qnm_i[il][m];
|
||||
double qm_sum = qnm_r[il][0]*qnm_r[il][0];
|
||||
for (int m = 1; m < l+1; m++)
|
||||
qm_sum += 2.0*(qnm_r[il][m]*qnm_r[il][m] + qnm_i[il][m]*qnm_i[il][m]);
|
||||
qn[jj++] = qnormfac * sqrt(qm_sum);
|
||||
}
|
||||
|
||||
// calculate W_l
|
||||
|
||||
if (wlflag) {
|
||||
int idxcg_count = 0;
|
||||
int nterms = 0;
|
||||
int widx_count = 0;
|
||||
if (wlflag || wlhatflag) {
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
double wlsum = 0.0;
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
int m = m1 + m2 - l;
|
||||
double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
|
||||
double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
|
||||
wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
|
||||
idxcg_count++;
|
||||
for (int m1 = -l; m1 <= 0; m1++) {
|
||||
for (int m2 = 0; m2 <= ((-m1)>>1); m2++) {
|
||||
const int m3 = -(m1 + m2);
|
||||
// Loop enforces -L<=m1<=0<=m2<=m3<=L, and m1+m2+m3=0
|
||||
|
||||
// For even L, W3j is invariant under permutation of
|
||||
// (m1,m2,m3) and (m1,m2,m3)->(-m1,-m2,-m3). The loop
|
||||
// structure enforces visiting only one member of each
|
||||
// such symmetry (invariance) group.
|
||||
|
||||
const int sgn = 1 - 2*(m1&1);
|
||||
// m1 <= 0, and Qlm[-m] = (-1)^m*conjg(Qlm[m]). sgn = (-1)^m.
|
||||
const double Q1Q2_r = (qnm_r[il][-m1]*qnm_r[il][m2] + qnm_i[il][-m1]*qnm_i[il][m2])*sgn;
|
||||
const double Q1Q2_i = (qnm_r[il][-m1]*qnm_i[il][m2] - qnm_i[il][-m1]*qnm_r[il][m2])*sgn;
|
||||
const double Q1Q2Q3 = Q1Q2_r*qnm_r[il][m3] - Q1Q2_i*qnm_i[il][m3];
|
||||
const double c = w3jlist[widx_count++];
|
||||
wlsum += Q1Q2Q3*c;
|
||||
}
|
||||
}
|
||||
qn[jj++] = wlsum/sqrt(2*l+1);
|
||||
nterms++;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate W_l_hat
|
||||
|
||||
if (wlhatflag) {
|
||||
int idxcg_count = 0;
|
||||
const int jptr = jj-nterms;
|
||||
if (!wlflag) jj = jptr;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
double wlsum = 0.0;
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
int m = m1 + m2 - l;
|
||||
double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
|
||||
double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
|
||||
wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
|
||||
idxcg_count++;
|
||||
}
|
||||
}
|
||||
if (qn[il] < QEPSILON)
|
||||
qn[jj++] = 0.0;
|
||||
else {
|
||||
double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
double qnfac = qnormfac/qn[il];
|
||||
qn[jj++] = wlsum/sqrt(2*l+1)*(qnfac*qnfac*qnfac);
|
||||
qn[jj++] = qn[jptr+il] * (qnfac*qnfac*qnfac) * sqrt(2*l+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -566,7 +566,14 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
|
||||
else {
|
||||
double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
double qnfac = qnormfac/qn[il];
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = -l; m < 0; m++) {
|
||||
// Computed only qnm for m>=0.
|
||||
// qnm[-m] = (-1)^m * conjg(qnm[m])
|
||||
const int sgn = 1 - 2*(m&1); // sgn = (-1)^m
|
||||
qn[jj++] = qnm_r[il][-m] * qnfac * sgn;
|
||||
qn[jj++] = -qnm_i[il][-m] * qnfac * sgn;
|
||||
}
|
||||
for (int m = 0; m < l+1; m++) {
|
||||
qn[jj++] = qnm_r[il][m] * qnfac;
|
||||
qn[jj++] = qnm_i[il][m] * qnfac;
|
||||
}
|
||||
@ -633,65 +640,92 @@ double ComputeOrientOrderAtom::associated_legendre(int l, int m, double x)
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
assign Clebsch-Gordan coefficients
|
||||
using the quasi-binomial formula VMK 8.2.1(3)
|
||||
specialized for case j1=j2=j=l
|
||||
Initialize table of Wigner 3j symbols
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ComputeOrientOrderAtom::init_clebsch_gordan()
|
||||
void ComputeOrientOrderAtom::init_wigner3j()
|
||||
{
|
||||
double sum,dcg,sfaccg, sfac1, sfac2;
|
||||
int m, aa2, bb2, cc2;
|
||||
int ifac, idxcg_count;
|
||||
|
||||
idxcg_count = 0;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++)
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
|
||||
idxcg_count++;
|
||||
}
|
||||
idxcg_max = idxcg_count;
|
||||
memory->create(cglist, idxcg_max, "computeorientorderatom:cglist");
|
||||
int widx_count = 0;
|
||||
|
||||
idxcg_count = 0;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
aa2 = m1 - l;
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
bb2 = m2 - l;
|
||||
m = aa2 + bb2 + l;
|
||||
for (int il = 0; il<nqlist; il++) {
|
||||
const int l = qlist[il];
|
||||
|
||||
sum = 0.0;
|
||||
for (int z = MAX(0, MAX(-aa2, bb2));
|
||||
z <= MIN(l, MIN(l - aa2, l + bb2)); z++) {
|
||||
ifac = z % 2 ? -1 : 1;
|
||||
sum += ifac /
|
||||
(factorial(z) *
|
||||
factorial(l - z) *
|
||||
factorial(l - aa2 - z) *
|
||||
factorial(l + bb2 - z) *
|
||||
factorial(aa2 + z) *
|
||||
factorial(-bb2 + z));
|
||||
}
|
||||
|
||||
cc2 = m - l;
|
||||
sfaccg = sqrt(factorial(l + aa2) *
|
||||
factorial(l - aa2) *
|
||||
factorial(l + bb2) *
|
||||
factorial(l - bb2) *
|
||||
factorial(l + cc2) *
|
||||
factorial(l - cc2) *
|
||||
(2*l + 1));
|
||||
|
||||
sfac1 = factorial(3*l + 1);
|
||||
sfac2 = factorial(l);
|
||||
dcg = sqrt(sfac2*sfac2*sfac2 / sfac1);
|
||||
|
||||
cglist[idxcg_count] = sum * dcg * sfaccg;
|
||||
idxcg_count++;
|
||||
}
|
||||
for (int m1 = -l; m1<=0; m1++) {
|
||||
for (int m2 = 0; m2<=((-m1)>>1); m2++) {
|
||||
widx_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
widx_max = widx_count;
|
||||
memory->create(w3jlist, widx_max, "computeorientorderatom:w3jlist");
|
||||
|
||||
widx_count = 0;
|
||||
|
||||
for (int il = 0; il<nqlist; il++) {
|
||||
const int l = qlist[il];
|
||||
|
||||
for (int m1 = -l; m1<=0; m1++) {
|
||||
for (int m2 = 0; m2<=((-m1)>>1); m2++) {
|
||||
const int m3 = -(m1 + m2);
|
||||
// Loop enforces -L<=m1<=0<=m2<=m3<=L, and m1+m2+m3=0
|
||||
|
||||
// For even L, W3j is invariant under permutation of
|
||||
// (m1,m2,m3) and (m1,m2,m3)->(-m1,-m2,-m3). The loop
|
||||
// structure enforces visiting only one member of each
|
||||
// such symmetry (invariance) group.
|
||||
|
||||
// Determine number of elements in symmetry group of (m1,m2,m3)
|
||||
// Concise determination exploiting (m1,m2,m3) loop structure.
|
||||
int pfac;
|
||||
if (m1 == 0) pfac = 1; // m1 = m2 = m3 = 0
|
||||
else if (m2 == 0 || m2 == m3) {
|
||||
// reduced group when only 3 permutations, or sign inversion
|
||||
// is equivalent to permutation
|
||||
pfac = 6;
|
||||
} else pfac = 12; // 6 permutations * 2 signs
|
||||
|
||||
w3jlist[widx_count] = w3j(l,m1,m2,m3) * pfac;
|
||||
widx_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double ComputeOrientOrderAtom::triangle_coeff(const int a, const int b, const int c) {
|
||||
return factorial(a+b-c)*factorial(a-b+c)*factorial(-a+b+c) / factorial(a+b+c+1);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double ComputeOrientOrderAtom::w3j(const int lmax, const int j1, const int j2, const int j3) {
|
||||
const int a = lmax, b = lmax, c = lmax;
|
||||
const int alpha = j1, beta = j2, gamma = j3;
|
||||
struct {
|
||||
double operator() (const int a, const int b, const int c,
|
||||
const int alpha, const int beta,const int gamma,
|
||||
const int t) {
|
||||
return factorial(t)*factorial(c-b+t+alpha)*factorial(c-a+t-beta) * factorial(a+b-c-t)*factorial(a-t-alpha)*factorial(b-t+beta);
|
||||
}
|
||||
} x;
|
||||
const double
|
||||
sgn = 1 - 2*((a-b-gamma)&1),
|
||||
g = sqrt(triangle_coeff(lmax,lmax,lmax)) * sqrt(factorial(a+alpha)*factorial(a-alpha)*
|
||||
factorial(b+beta)*factorial(b-beta)*
|
||||
factorial(c+gamma)*factorial(c-gamma));
|
||||
double s = 0;
|
||||
int t = 0;
|
||||
while(c-b+t+alpha < 0 || c-a+t-beta < 0) t++;
|
||||
// ^^ t>=-j1 ^^ t>=j2
|
||||
while(1) {
|
||||
if (a+b-c-t < 0) break; // t<=lmax
|
||||
if (a-t-alpha < 0) break; // t<=lmax-j1
|
||||
if (b-t+beta < 0) break; // t<=lmax+j2
|
||||
const int m1t = 1 - 2*(t&1);
|
||||
s += m1t/x(lmax,lmax,lmax,alpha,beta,gamma,t);
|
||||
t++;
|
||||
}
|
||||
return sgn*g*s;
|
||||
}
|
||||
|
||||
@ -55,9 +55,11 @@ class ComputeOrientOrderAtom : public Compute {
|
||||
double polar_prefactor(int, int, double);
|
||||
double associated_legendre(int, int, double);
|
||||
|
||||
virtual void init_clebsch_gordan();
|
||||
double *cglist; // Clebsch-Gordan coeffs
|
||||
int idxcg_max;
|
||||
virtual void init_wigner3j();
|
||||
double triangle_coeff(const int a, const int b, const int c);
|
||||
double w3j(const int L, const int j1, const int j2, const int j3);
|
||||
double *w3jlist; // Wigner coeffs
|
||||
int widx_max;
|
||||
int chunksize;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user