Add optimized version of compute_orientorder_atom from Tomas Oppelstrup (LLNL)

This commit is contained in:
Stan Moore
2021-10-12 07:50:05 -07:00
parent a6cde11896
commit 17316f92c7
4 changed files with 193 additions and 206 deletions

View File

@ -13,7 +13,11 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
Contributing authors: Stan Moore (SNL)
Tomas Oppelstrup (LLNL): Optimization which reduces the number
of iterations in the L,m1,m2 loops (by a factor of up to 10), and
avoids evaluation of Ylm functions of negative m
------------------------------------------------------------------------- */
#include "compute_orientorder_atom_kokkos.h"
@ -145,7 +149,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
chunk_offset = 0;
if (chunk_size > (int)d_ncount.extent(0)) {
d_qnm = t_sna_3c("orientorder/atom:qnm",chunk_size,nqlist,2*qmax+1);
d_qnm = t_sna_3c("orientorder/atom:qnm",chunk_size,nqlist,qmax+1);
d_ncount = t_sna_1i("orientorder/atom:ncount",chunk_size);
}
@ -337,7 +341,6 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
calc_boop2(ncount, ii);
}
/* ----------------------------------------------------------------------
select3 routine from Numerical Recipes (slightly modified)
find k smallest values in array of length n
@ -471,26 +474,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
//d_qnm(ii,il,l).re += polar_prefactor(l, 0, costheta);
const double polar_pf = polar_prefactor(l, 0, costheta);
Kokkos::atomic_add(&(d_qnm(ii,il,l).re), polar_pf);
Kokkos::atomic_add(&(d_qnm(ii,il,0).re), polar_pf);
SNAcomplex expphim = {expphi.re,expphi.im};
for (int m = 1; m <= +l; m++) {
const double prefactor = polar_prefactor(l, m, costheta);
SNAcomplex ylm = {prefactor * expphim.re, prefactor * expphim.im};
//d_qnm(ii,il,m+l).re += ylm.re;
//d_qnm(ii,il,m+l).im += ylm.im;
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).re), ylm.re);
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).im), ylm.im);
if (m & 1) {
//d_qnm(ii,il,-m+l).re -= ylm.re;
//d_qnm(ii,il,-m+l).im += ylm.im;
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), -ylm.re);
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), ylm.im);
} else {
//d_qnm(ii,il,-m+l).re += ylm.re;
//d_qnm(ii,il,-m+l).im -= ylm.im;
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), ylm.re);
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), -ylm.im);
}
Kokkos::atomic_add(&(d_qnm(ii,il,m).re), ylm.re);
Kokkos::atomic_add(&(d_qnm(ii,il,m).im), ylm.im);
// Skip calculation of qnm for m<0 due to symmetry
SNAcomplex tmp;
tmp.re = expphim.re*expphi.re - expphim.im*expphi.im;
tmp.im = expphim.re*expphi.im + expphim.im*expphi.re;
@ -515,7 +506,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
double facn = 1.0 / ncount;
for (int il = 0; il < nqlist; il++) {
int l = d_qlist[il];
for (int m = 0; m < 2*l+1; m++) {
for (int m = 0; m < l+1; m++) {
d_qnm(ii,il,m).re *= facn;
d_qnm(ii,il,m).im *= facn;
}
@ -528,56 +519,58 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
for (int il = 0; il < nqlist; il++) {
int l = d_qlist[il];
double qnormfac = sqrt(MY_4PI/(2*l+1));
double qm_sum = 0.0;
for (int m = 0; m < 2*l+1; m++)
qm_sum += d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im;
double qm_sum = d_qnm(ii,il,0).re*d_qnm(ii,il,0).re;
for (int m = 1; m < l+1; m++)
qm_sum += 2.0*(d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im);
d_qnarray(i,jj++) = qnormfac * sqrt(qm_sum);
}
// calculate W_l
if (wlflag) {
int idxcg_count = 0;
int nterms = 0;
int widx_count = 0;
if (wlflag || wlhatflag) {
for (int il = 0; il < nqlist; il++) {
int l = d_qlist[il];
double wlsum = 0.0;
for (int m1 = 0; m1 < 2*l+1; m1++) {
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
int m = m1 + m2 - l;
SNAcomplex qm1qm2;
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
idxcg_count++;
for (int m1 = -l; m1 <= 0; m1++) {
for (int m2 = 0; m2 <= ((-m1)>>1); m2++) {
const int m3 = -(m1 + m2);
// Loop enforces -L<=m1<=0<=m2<=m3<=L, and m1+m2+m3=0
// For even L, W3j is invariant under permutation of
// (m1,m2,m3) and (m1,m2,m3)->(-m1,-m2,-m3). The loop
// structure enforces visiting only one member of each
// such symmetry (invariance) group.
const int sgn = 1 - 2*(m1&1);
// m1 <= 0, and Qlm[-m] = (-1)^m*conjg(Qlm[m]). sgn = (-1)^m.
SNAcomplex Q1Q2;
Q1Q2.re = (d_qnm(ii,il,-m1).re*d_qnm(ii,il,m2).re + d_qnm(ii,il,-m1).im*d_qnm(ii,il,m2).im)*sgn;
Q1Q2.im = (d_qnm(ii,il,-m1).re*d_qnm(ii,il,m2).im - d_qnm(ii,il,-m1).im*d_qnm(ii,il,m2).re)*sgn;
const double Q1Q2Q3 = Q1Q2.re*d_qnm(ii,il,m3).re - Q1Q2.im*d_qnm(ii,il,m3).im;
const double c = d_w3jlist[widx_count++];
wlsum += Q1Q2Q3*c;
}
}
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0);
nterms++;
}
}
// calculate W_l_hat
if (wlhatflag) {
int idxcg_count = 0;
const int jptr = jj-nterms;
if (!wlflag) jj = jptr;
for (int il = 0; il < nqlist; il++) {
int l = d_qlist[il];
double wlsum = 0.0;
for (int m1 = 0; m1 < 2*l+1; m1++) {
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
const int m = m1 + m2 - l;
SNAcomplex qm1qm2;
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
idxcg_count++;
}
}
if (d_qnarray(i,il) < QEPSILON)
d_qnarray(i,jj++) = 0.0;
else {
const double qnormfac = sqrt(MY_4PI/(2*l+1));
const double qnfac = qnormfac/d_qnarray(i,il);
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0)*(qnfac*qnfac*qnfac);
d_qnarray(i,jj++) = d_qnarray(i,jptr+il) * (qnfac*qnfac*qnfac) * sqrt(2.0*l+1.0);
}
}
}
@ -595,7 +588,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
else {
const double qnormfac = sqrt(MY_4PI/(2*l+1));
const double qnfac = qnormfac/d_qnarray(i,il);
for (int m = 0; m < 2*l+1; m++) {
for (int m = -l; m < 0; m++) {
// Computed only qnm for m>=0.
// qnm[-m] = (-1)^m * conjg(qnm[m])
const int sgn = 1 - 2*(m&1); // sgn = (-1)^m
d_qnarray(i,jj++) = d_qnm(ii,il,-m).re * qnfac * sgn;
d_qnarray(i,jj++) = -d_qnm(ii,il,-m).im * qnfac * sgn;
}
for (int m = 0; m < l+1; m++) {
d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
}
@ -657,70 +657,21 @@ double ComputeOrientOrderAtomKokkos<DeviceType>::associated_legendre(int l, int
}
/* ----------------------------------------------------------------------
assign Clebsch-Gordan coefficients
using the quasi-binomial formula VMK 8.2.1(3)
specialized for case j1=j2=j=l
Initialize table of Wigner 3j symbols
------------------------------------------------------------------------- */
template<class DeviceType>
void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
void ComputeOrientOrderAtomKokkos<DeviceType>::init_wigner3j()
{
double sum,dcg,sfaccg, sfac1, sfac2;
int m, aa2, bb2, cc2;
int ifac, idxcg_count;
ComputeOrientOrderAtom::init_wigner3j();
idxcg_count = 0;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for (int m1 = 0; m1 < 2*l+1; m1++)
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
idxcg_count++;
}
idxcg_max = idxcg_count;
d_cglist = t_sna_1d("orientorder/atom:d_cglist",idxcg_max);
auto h_cglist = Kokkos::create_mirror_view(d_cglist);
d_w3jlist = t_sna_1d("computeorientorderatom:w3jlist",widx_max);
auto h_w3jlist = Kokkos::create_mirror_view(d_w3jlist);
idxcg_count = 0;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for (int m1 = 0; m1 < 2*l+1; m1++) {
aa2 = m1 - l;
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
bb2 = m2 - l;
m = aa2 + bb2 + l;
for (int i = 0; i< widx_max; i++)
h_w3jlist(i) = w3jlist[i];
sum = 0.0;
for (int z = MAX(0, MAX(-aa2, bb2));
z <= MIN(l, MIN(l - aa2, l + bb2)); z++) {
ifac = z % 2 ? -1 : 1;
sum += ifac /
(factorial(z) *
factorial(l - z) *
factorial(l - aa2 - z) *
factorial(l + bb2 - z) *
factorial(aa2 + z) *
factorial(-bb2 + z));
}
cc2 = m - l;
sfaccg = sqrt(factorial(l + aa2) *
factorial(l - aa2) *
factorial(l + bb2) *
factorial(l - bb2) *
factorial(l + cc2) *
factorial(l - cc2) *
(2*l + 1));
sfac1 = factorial(3*l + 1);
sfac2 = factorial(l);
dcg = sqrt(sfac2*sfac2*sfac2 / sfac1);
h_cglist[idxcg_count] = sum * dcg * sfaccg;
idxcg_count++;
}
}
}
Kokkos::deep_copy(d_cglist,h_cglist);
Kokkos::deep_copy(d_w3jlist,h_w3jlist);
}
/* ----------------------------------------------------------------------

View File

@ -127,8 +127,8 @@ class ComputeOrientOrderAtomKokkos : public ComputeOrientOrderAtom {
KOKKOS_INLINE_FUNCTION
double associated_legendre(int, int, double) const;
void init_clebsch_gordan();
t_sna_1d d_cglist; // Clebsch-Gordan coeffs
void init_wigner3j();
t_sna_1d d_w3jlist; // Wigner coeffs
};
}

View File

@ -13,8 +13,11 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Aidan Thompson (SNL)
Axel Kohlmeyer (Temple U)
Contributing authors: Aidan Thompson (SNL), Axel Kohlmeyer (Temple U)
Tomas Oppelstrup (LLNL): Optimization which reduces the number
of iterations in the L,m1,m2 loops (by a factor of up to 10), and
avoids evaluation of Ylm functions of negative m
------------------------------------------------------------------------- */
#include "compute_orientorder_atom.h"
@ -54,7 +57,7 @@ using namespace MathSpecial;
ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg),
qlist(nullptr), distsq(nullptr), nearest(nullptr), rlist(nullptr),
qnarray(nullptr), qnm_r(nullptr), qnm_i(nullptr), cglist(nullptr)
qnarray(nullptr), qnm_r(nullptr), qnm_i(nullptr), w3jlist(nullptr)
{
if (narg < 3 ) error->all(FLERR,"Illegal compute orientorder/atom command");
@ -166,7 +169,7 @@ ComputeOrientOrderAtom::~ComputeOrientOrderAtom()
memory->destroy(qlist);
memory->destroy(qnm_r);
memory->destroy(qnm_i);
memory->destroy(cglist);
memory->destroy(w3jlist);
}
/* ---------------------------------------------------------------------- */
@ -181,8 +184,8 @@ void ComputeOrientOrderAtom::init()
error->all(FLERR,"Compute orientorder/atom cutoff is "
"longer than pairwise cutoff");
memory->create(qnm_r,nqlist,2*qmax+1,"orientorder/atom:qnm_r");
memory->create(qnm_i,nqlist,2*qmax+1,"orientorder/atom:qnm_i");
memory->create(qnm_r,nqlist,qmax+1,"orientorder/atom:qnm_r");
memory->create(qnm_i,nqlist,qmax+1,"orientorder/atom:qnm_i");
// need an occasional full neighbor list
@ -199,7 +202,7 @@ void ComputeOrientOrderAtom::init()
if (count > 1 && comm->me == 0)
error->warning(FLERR,"More than one compute orientorder/atom");
if (wlflag || wlhatflag) init_clebsch_gordan();
if (wlflag || wlhatflag) init_wigner3j();
}
/* ---------------------------------------------------------------------- */
@ -424,7 +427,7 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for (int m = 0; m < 2*l+1; m++) {
for (int m = 0; m < l+1; m++) {
qnm_r[il][m] = 0.0;
qnm_i[il][m] = 0.0;
}
@ -457,7 +460,7 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
// Ylm, -l <= m <= l
// sign convention: sign(Yll(0,0)) = (-1)^l
qnm_r[il][l] += polar_prefactor(l, 0, costheta);
qnm_r[il][0] += polar_prefactor(l, 0, costheta);
double expphim_r = expphi_r;
double expphim_i = expphi_i;
for (int m = 1; m <= +l; m++) {
@ -465,15 +468,9 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
double prefactor = polar_prefactor(l, m, costheta);
double ylm_r = prefactor * expphim_r;
double ylm_i = prefactor * expphim_i;
qnm_r[il][m+l] += ylm_r;
qnm_i[il][m+l] += ylm_i;
if (m & 1) {
qnm_r[il][-m+l] -= ylm_r;
qnm_i[il][-m+l] += ylm_i;
} else {
qnm_r[il][-m+l] += ylm_r;
qnm_i[il][-m+l] -= ylm_i;
}
qnm_r[il][m] += ylm_r;
qnm_i[il][m] += ylm_i;
// Skip calculation of qnm for m<0 due to symmetry
double tmp_r = expphim_r*expphi_r - expphim_i*expphi_i;
double tmp_i = expphim_r*expphi_i + expphim_i*expphi_r;
expphim_r = tmp_r;
@ -488,7 +485,7 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
double facn = 1.0 / ncount;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for (int m = 0; m < 2*l+1; m++) {
for (int m = 0; m < l+1; m++) {
qnm_r[il][m] *= facn;
qnm_i[il][m] *= facn;
}
@ -501,54 +498,57 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
double qnormfac = sqrt(MY_4PI/(2*l+1));
double qm_sum = 0.0;
for (int m = 0; m < 2*l+1; m++)
qm_sum += qnm_r[il][m]*qnm_r[il][m] + qnm_i[il][m]*qnm_i[il][m];
double qm_sum = qnm_r[il][0]*qnm_r[il][0];
for (int m = 1; m < l+1; m++)
qm_sum += 2.0*(qnm_r[il][m]*qnm_r[il][m] + qnm_i[il][m]*qnm_i[il][m]);
qn[jj++] = qnormfac * sqrt(qm_sum);
}
// calculate W_l
if (wlflag) {
int idxcg_count = 0;
int nterms = 0;
int widx_count = 0;
if (wlflag || wlhatflag) {
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
double wlsum = 0.0;
for (int m1 = 0; m1 < 2*l+1; m1++) {
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
int m = m1 + m2 - l;
double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
idxcg_count++;
for (int m1 = -l; m1 <= 0; m1++) {
for (int m2 = 0; m2 <= ((-m1)>>1); m2++) {
const int m3 = -(m1 + m2);
// Loop enforces -L<=m1<=0<=m2<=m3<=L, and m1+m2+m3=0
// For even L, W3j is invariant under permutation of
// (m1,m2,m3) and (m1,m2,m3)->(-m1,-m2,-m3). The loop
// structure enforces visiting only one member of each
// such symmetry (invariance) group.
const int sgn = 1 - 2*(m1&1);
// m1 <= 0, and Qlm[-m] = (-1)^m*conjg(Qlm[m]). sgn = (-1)^m.
const double Q1Q2_r = (qnm_r[il][-m1]*qnm_r[il][m2] + qnm_i[il][-m1]*qnm_i[il][m2])*sgn;
const double Q1Q2_i = (qnm_r[il][-m1]*qnm_i[il][m2] - qnm_i[il][-m1]*qnm_r[il][m2])*sgn;
const double Q1Q2Q3 = Q1Q2_r*qnm_r[il][m3] - Q1Q2_i*qnm_i[il][m3];
const double c = w3jlist[widx_count++];
wlsum += Q1Q2Q3*c;
}
}
qn[jj++] = wlsum/sqrt(2*l+1);
nterms++;
}
}
// calculate W_l_hat
if (wlhatflag) {
int idxcg_count = 0;
const int jptr = jj-nterms;
if (!wlflag) jj = jptr;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
double wlsum = 0.0;
for (int m1 = 0; m1 < 2*l+1; m1++) {
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
int m = m1 + m2 - l;
double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
idxcg_count++;
}
}
if (qn[il] < QEPSILON)
qn[jj++] = 0.0;
else {
double qnormfac = sqrt(MY_4PI/(2*l+1));
double qnfac = qnormfac/qn[il];
qn[jj++] = wlsum/sqrt(2*l+1)*(qnfac*qnfac*qnfac);
qn[jj++] = qn[jptr+il] * (qnfac*qnfac*qnfac) * sqrt(2*l+1);
}
}
}
@ -566,7 +566,14 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
else {
double qnormfac = sqrt(MY_4PI/(2*l+1));
double qnfac = qnormfac/qn[il];
for (int m = 0; m < 2*l+1; m++) {
for (int m = -l; m < 0; m++) {
// Computed only qnm for m>=0.
// qnm[-m] = (-1)^m * conjg(qnm[m])
const int sgn = 1 - 2*(m&1); // sgn = (-1)^m
qn[jj++] = qnm_r[il][-m] * qnfac * sgn;
qn[jj++] = -qnm_i[il][-m] * qnfac * sgn;
}
for (int m = 0; m < l+1; m++) {
qn[jj++] = qnm_r[il][m] * qnfac;
qn[jj++] = qnm_i[il][m] * qnfac;
}
@ -633,65 +640,92 @@ double ComputeOrientOrderAtom::associated_legendre(int l, int m, double x)
}
/* ----------------------------------------------------------------------
assign Clebsch-Gordan coefficients
using the quasi-binomial formula VMK 8.2.1(3)
specialized for case j1=j2=j=l
Initialize table of Wigner 3j symbols
------------------------------------------------------------------------- */
void ComputeOrientOrderAtom::init_clebsch_gordan()
void ComputeOrientOrderAtom::init_wigner3j()
{
double sum,dcg,sfaccg, sfac1, sfac2;
int m, aa2, bb2, cc2;
int ifac, idxcg_count;
idxcg_count = 0;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for (int m1 = 0; m1 < 2*l+1; m1++)
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
idxcg_count++;
}
idxcg_max = idxcg_count;
memory->create(cglist, idxcg_max, "computeorientorderatom:cglist");
int widx_count = 0;
idxcg_count = 0;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for (int m1 = 0; m1 < 2*l+1; m1++) {
aa2 = m1 - l;
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
bb2 = m2 - l;
m = aa2 + bb2 + l;
for (int il = 0; il<nqlist; il++) {
const int l = qlist[il];
sum = 0.0;
for (int z = MAX(0, MAX(-aa2, bb2));
z <= MIN(l, MIN(l - aa2, l + bb2)); z++) {
ifac = z % 2 ? -1 : 1;
sum += ifac /
(factorial(z) *
factorial(l - z) *
factorial(l - aa2 - z) *
factorial(l + bb2 - z) *
factorial(aa2 + z) *
factorial(-bb2 + z));
}
cc2 = m - l;
sfaccg = sqrt(factorial(l + aa2) *
factorial(l - aa2) *
factorial(l + bb2) *
factorial(l - bb2) *
factorial(l + cc2) *
factorial(l - cc2) *
(2*l + 1));
sfac1 = factorial(3*l + 1);
sfac2 = factorial(l);
dcg = sqrt(sfac2*sfac2*sfac2 / sfac1);
cglist[idxcg_count] = sum * dcg * sfaccg;
idxcg_count++;
}
for (int m1 = -l; m1<=0; m1++) {
for (int m2 = 0; m2<=((-m1)>>1); m2++) {
widx_count++;
}
}
}
widx_max = widx_count;
memory->create(w3jlist, widx_max, "computeorientorderatom:w3jlist");
widx_count = 0;
for (int il = 0; il<nqlist; il++) {
const int l = qlist[il];
for (int m1 = -l; m1<=0; m1++) {
for (int m2 = 0; m2<=((-m1)>>1); m2++) {
const int m3 = -(m1 + m2);
// Loop enforces -L<=m1<=0<=m2<=m3<=L, and m1+m2+m3=0
// For even L, W3j is invariant under permutation of
// (m1,m2,m3) and (m1,m2,m3)->(-m1,-m2,-m3). The loop
// structure enforces visiting only one member of each
// such symmetry (invariance) group.
// Determine number of elements in symmetry group of (m1,m2,m3)
// Concise determination exploiting (m1,m2,m3) loop structure.
int pfac;
if (m1 == 0) pfac = 1; // m1 = m2 = m3 = 0
else if (m2 == 0 || m2 == m3) {
// reduced group when only 3 permutations, or sign inversion
// is equivalent to permutation
pfac = 6;
} else pfac = 12; // 6 permutations * 2 signs
w3jlist[widx_count] = w3j(l,m1,m2,m3) * pfac;
widx_count++;
}
}
}
}
/* ---------------------------------------------------------------------- */
double ComputeOrientOrderAtom::triangle_coeff(const int a, const int b, const int c) {
return factorial(a+b-c)*factorial(a-b+c)*factorial(-a+b+c) / factorial(a+b+c+1);
}
/* ---------------------------------------------------------------------- */
double ComputeOrientOrderAtom::w3j(const int lmax, const int j1, const int j2, const int j3) {
const int a = lmax, b = lmax, c = lmax;
const int alpha = j1, beta = j2, gamma = j3;
struct {
double operator() (const int a, const int b, const int c,
const int alpha, const int beta,const int gamma,
const int t) {
return factorial(t)*factorial(c-b+t+alpha)*factorial(c-a+t-beta) * factorial(a+b-c-t)*factorial(a-t-alpha)*factorial(b-t+beta);
}
} x;
const double
sgn = 1 - 2*((a-b-gamma)&1),
g = sqrt(triangle_coeff(lmax,lmax,lmax)) * sqrt(factorial(a+alpha)*factorial(a-alpha)*
factorial(b+beta)*factorial(b-beta)*
factorial(c+gamma)*factorial(c-gamma));
double s = 0;
int t = 0;
while(c-b+t+alpha < 0 || c-a+t-beta < 0) t++;
// ^^ t>=-j1 ^^ t>=j2
while(1) {
if (a+b-c-t < 0) break; // t<=lmax
if (a-t-alpha < 0) break; // t<=lmax-j1
if (b-t+beta < 0) break; // t<=lmax+j2
const int m1t = 1 - 2*(t&1);
s += m1t/x(lmax,lmax,lmax,alpha,beta,gamma,t);
t++;
}
return sgn*g*s;
}

View File

@ -55,9 +55,11 @@ class ComputeOrientOrderAtom : public Compute {
double polar_prefactor(int, int, double);
double associated_legendre(int, int, double);
virtual void init_clebsch_gordan();
double *cglist; // Clebsch-Gordan coeffs
int idxcg_max;
virtual void init_wigner3j();
double triangle_coeff(const int a, const int b, const int c);
double w3j(const int L, const int j1, const int j2, const int j3);
double *w3jlist; // Wigner coeffs
int widx_max;
int chunksize;
};