Files
lammps/src/INTEL/sna_intel.cpp
2023-09-14 17:11:51 -04:00

1506 lines
49 KiB
C++

// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown, Intel
------------------------------------------------------------------------- */
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#include "sna_intel.h"
#include "comm.h"
#include "error.h"
#include "math_const.h"
#include "math_special.h"
#include "memory.h"
#include <cmath>
using namespace std;
using namespace LAMMPS_NS;
using namespace MathConst;
using namespace MathSpecial;
using namespace ip_simd;
/* ----------------------------------------------------------------------
this implementation is based on the method outlined
in Bartok[1], using formulae from VMK[2].
for the Clebsch-Gordan coefficients, we
convert the VMK half-integral labels
a, b, c, alpha, beta, gamma
to array offsets j1, j2, j, m1, m2, m
using the following relations:
j1 = 2*a
j2 = 2*b
j = 2*c
m1 = alpha+a 2*alpha = 2*m1 - j1
m2 = beta+b or 2*beta = 2*m2 - j2
m = gamma+c 2*gamma = 2*m - j
in this way:
-a <= alpha <= a
-b <= beta <= b
-c <= gamma <= c
becomes:
0 <= m1 <= j1
0 <= m2 <= j2
0 <= m <= j
and the requirement that
a+b+c be integral implies that
j1+j2+j must be even.
The requirement that:
gamma = alpha+beta
becomes:
2*m - j = 2*m1 - j1 + 2*m2 - j2
Similarly, for the Wigner U-functions U(J,m,m') we
convert the half-integral labels J,m,m' to
array offsets j,ma,mb:
j = 2*J
ma = J+m
mb = J+m'
so that:
0 <= j <= 2*Jmax
0 <= ma, mb <= j.
For the bispectrum components B(J1,J2,J) we convert to:
j1 = 2*J1
j2 = 2*J2
j = 2*J
and the requirement:
|J1-J2| <= J <= J1+J2, for j1+j2+j integral
becomes:
|j1-j2| <= j <= j1+j2, for j1+j2+j even integer
or
j = |j1-j2|, |j1-j2|+2,...,j1+j2-2,j1+j2
[1] Albert Bartok-Partay, "Gaussian Approximation..."
Doctoral Thesis, Cambridge University, (2009)
[2] D. A. Varshalovich, A. N. Moskalev, and V. K. Khersonskii,
"Quantum Theory of Angular Momentum," World Scientific (1988)
------------------------------------------------------------------------- */
SNAIntel::SNAIntel(LAMMPS* lmp, double rfac0_in, int twojmax_in,
double rmin0_in, int switch_flag_in, int bzero_flag_in,
int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in,
int nelements_in, int switch_inner_flag_in) : Pointers(lmp)
{
wself = 1.0;
rfac0 = rfac0_in;
rmin0 = rmin0_in;
switch_flag = switch_flag_in;
switch_inner_flag = switch_inner_flag_in;
bzero_flag = bzero_flag_in;
chem_flag = chem_flag_in;
bnorm_flag = bnorm_flag_in;
wselfall_flag = wselfall_flag_in;
if (bnorm_flag != chem_flag)
lmp->error->warning(FLERR, "bnormflag and chemflag are not equal."
"This is probably not what you intended");
if (chem_flag)
nelements = nelements_in;
else
nelements = 1;
twojmax = twojmax_in;
compute_ncoeff();
rij = nullptr;
inside = nullptr;
wj = nullptr;
rcutij = nullptr;
sinnerij = nullptr;
dinnerij = nullptr;
element = nullptr;
nmax = 0;
idxz = nullptr;
idxb = nullptr;
ulist_r_ij = nullptr;
ulist_i_ij = nullptr;
build_indexlist();
create_twojmax_arrays();
if (bzero_flag) {
double www = wself*wself*wself;
for (int j = 0; j <= twojmax; j++)
if (bnorm_flag)
bzero[j] = www;
else
bzero[j] = www*(j+1);
}
}
/* ---------------------------------------------------------------------- */
SNAIntel::~SNAIntel()
{
memory->destroy(rij);
memory->destroy(inside);
memory->destroy(wj);
memory->destroy(rcutij);
memory->destroy(sinnerij);
memory->destroy(dinnerij);
if (chem_flag) memory->destroy(element);
memory->destroy(ulist_r_ij);
memory->destroy(ulist_i_ij);
delete[] idxz;
delete[] idxb;
destroy_twojmax_arrays();
}
void SNAIntel::build_indexlist()
{
// index list for cglist
int jdim = twojmax + 1;
memory->create(idxcg_block, jdim, jdim, jdim,
"sna:idxcg_block");
int idxcg_count = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
idxcg_block[j1][j2][j] = idxcg_count;
for (int m1 = 0; m1 <= j1; m1++)
for (int m2 = 0; m2 <= j2; m2++)
idxcg_count++;
}
idxcg_max = idxcg_count;
// index list for uarray
// need to include both halves
memory->create(idxu_block, jdim,
"sna:idxu_block");
int idxu_count = 0;
for (int j = 0; j <= twojmax; j++) {
idxu_block[j] = idxu_count;
for (int mb = 0; mb <= j; mb++)
for (int ma = 0; ma <= j; ma++)
idxu_count++;
}
idxu_max = idxu_count;
// index list for beta and B
int idxb_count = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
if (j >= j1) idxb_count++;
idxb_max = idxb_count;
idxb = new SNA_BINDICES[idxb_max];
idxb_count = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
if (j >= j1) {
idxb[idxb_count].j1 = j1;
idxb[idxb_count].j2 = j2;
idxb[idxb_count].j = j;
idxb_count++;
}
// reverse index list for beta and b
memory->create(idxb_block, jdim, jdim, jdim,
"sna:idxb_block");
idxb_count = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
if (j >= j1) {
idxb_block[j1][j2][j] = idxb_count;
idxb_count++;
}
}
// index list for zlist
int idxz_count = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
for (int mb = 0; 2*mb <= j; mb++)
for (int ma = 0; ma <= j; ma++)
idxz_count++;
idxz_max = idxz_count;
idxz = new SNA_ZINDICES[idxz_max];
memory->create(idxz_block, jdim, jdim, jdim,
"sna:idxz_block");
idxz_count = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
idxz_block[j1][j2][j] = idxz_count;
// find right beta[jjb] entry
// multiply and divide by j+1 factors
// account for multiplicity of 1, 2, or 3
for (int mb = 0; 2*mb <= j; mb++)
for (int ma = 0; ma <= j; ma++) {
idxz[idxz_count].j1 = j1;
idxz[idxz_count].j2 = j2;
idxz[idxz_count].j = j;
idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2);
idxz[idxz_count].ma2max = (2 * ma - j - (2 * idxz[idxz_count].ma1min - j1) + j2) / 2;
idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count].ma1min + 1;
idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2);
idxz[idxz_count].mb2max = (2 * mb - j - (2 * idxz[idxz_count].mb1min - j1) + j2) / 2;
idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1;
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
const int jju = idxu_block[j] + (j+1)*mb + ma;
idxz[idxz_count].jju = jju;
idxz_count++;
}
}
}
/* ---------------------------------------------------------------------- */
void SNAIntel::init()
{
init_clebsch_gordan();
// print_clebsch_gordan();
init_rootpqarray();
}
void SNAIntel::grow_rij(int newnmax)
{
if (newnmax <= nmax) return;
nmax = newnmax;
memory->destroy(rij);
memory->destroy(inside);
memory->destroy(wj);
memory->destroy(rcutij);
memory->destroy(sinnerij);
memory->destroy(dinnerij);
if (chem_flag) memory->destroy(element);
memory->destroy(ulist_r_ij);
memory->destroy(ulist_i_ij);
memory->create(rij, nmax, 3, "pair:rij");
memory->create(inside, nmax, "pair:inside");
memory->create(wj, nmax, "pair:wj");
memory->create(rcutij, nmax, "pair:rcutij");
memory->create(sinnerij, nmax, "pair:sinnerij");
memory->create(dinnerij, nmax, "pair:dinnerij");
if (chem_flag) memory->create(element, nmax, "sna:element");
memory->create(ulist_r_ij, nmax, idxu_max, "sna:ulist_ij");
memory->create(ulist_i_ij, nmax, idxu_max, "sna:ulist_ij");
}
/* ----------------------------------------------------------------------
compute Ui by summing over neighbors j
------------------------------------------------------------------------- */
void SNAIntel::compute_ui(const SNA_IVEC &jnum, const SNA_IVEC &ielem,
const int max_jnum)
{
// utot(j,ma,mb) = 0 for all j,ma,ma
// utot(j,ma,ma) = 1 for all j,ma
// for j in neighbors of i:
// compute r0 = (x,y,z,z0)
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
zero_uarraytot(ielem);
for (int j = 0; j < max_jnum; j++) {
const SNA_DVEC x = rij[j][0];
const SNA_DVEC y = rij[j][1];
const SNA_DVEC z = rij[j][2];
const SNA_DVEC rcut = rcutij[j];
const SNA_DVEC rsq = x * x + y * y + z * z;
const SNA_DVEC r = SIMD_sqrt(rsq);
const SNA_DVEC rscale0 = SIMD_rcp(rcut - rmin0) * rfac0 * MY_PI;
const SNA_DVEC theta0 = (r - rmin0) * rscale0;
const SNA_DVEC z0 = r * SIMD_rcp(SIMD_tan(theta0));
compute_uarray(x, y, z, z0, r, j, jnum);
add_uarraytot(r, j, jnum);
}
}
/* ----------------------------------------------------------------------
pick out right beta value
------------------------------------------------------------------------- */
double SNAIntel::choose_beta(const int j, const int j1, const int j2,
const int elem1, const int elem2, const int elem3,
int &itriple)
{
double bfactor;
if (j >= j1) {
const int jjb = idxb_block[j1][j2][j];
itriple = ((elem1 * nelements + elem2) * nelements + elem3) *
idxb_max + jjb;
if (j1 == j) {
if (j2 == j)
bfactor = 3.0;
else
bfactor = 2.0;
} else
bfactor = 1.0;
} else if (j >= j2) {
const int jjb = idxb_block[j][j2][j1];
itriple = ((elem3 * nelements + elem2) * nelements + elem1) *
idxb_max + jjb;
if (j2 == j)
bfactor = 2.0;
else
bfactor = 1.0;
} else {
const int jjb = idxb_block[j2][j][j1];
itriple = ((elem2 * nelements + elem3) * nelements + elem1) *
idxb_max + jjb;
bfactor = 1.0;
}
if (!bnorm_flag && j1 > j)
bfactor *= (1.0 + j1) / (1.0 + j);
return bfactor;
}
/* ----------------------------------------------------------------------
compute Yi from Ui without storing Zi, looping over zlist indices
------------------------------------------------------------------------- */
template <int COMPUTE_YI>
void SNAIntel::compute_zi_or_yi(const SNA_DVEC* beta)
{
if (COMPUTE_YI) {
memset(ylist_r,0,idxu_max*nelements*sizeof(SNA_DVEC));
memset(ylist_i,0,idxu_max*nelements*sizeof(SNA_DVEC));
}
double *zlist_rp = (double *)zlist_r;
double *zlist_ip = (double *)zlist_i;
int zlist_i = 0;
for (int elem1 = 0; elem1 < nelements; elem1++)
for (int elem2 = 0; elem2 < nelements; elem2++) {
for (int jjz = 0; jjz < idxz_max; jjz++) {
const int j1 = idxz[jjz].j1;
const int j2 = idxz[jjz].j2;
const int j = idxz[jjz].j;
const int ma1min = idxz[jjz].ma1min;
const int ma2max = idxz[jjz].ma2max;
const int na = idxz[jjz].na;
const int mb1min = idxz[jjz].mb1min;
const int mb2max = idxz[jjz].mb2max;
const int nb = idxz[jjz].nb;
const double *cgblock = cglist + idxcg_block[j1][j2][j];
SNA_DVEC ztmp_r = 0.0;
SNA_DVEC ztmp_i = 0.0;
const double *u_r = (double *)ulisttot_r;
const double *u_i = (double *)ulisttot_i;
int jju1 = elem1 * idxu_max + idxu_block[j1] + (j1 + 1) * mb1min;
int jju2 = elem2 * idxu_max + idxu_block[j2] + (j2 + 1) * mb2max;
jju1 *= vector_width();
jju2 *= vector_width();
int icgb = mb1min * (j2 + 1) + mb2max;
for (int ib = 0; ib < nb; ib++) {
SNA_DVEC suma1_r = 0.0;
SNA_DVEC suma1_i = 0.0;
int ma1 = ma1min * vector_width();
int ma2 = ma2max * vector_width();
int icga = ma1min * (j2 + 1) + ma2max;
for (int ia = 0; ia < na; ia++) {
const SNA_DVEC u1_r = SIMD_load(u_r + jju1 + ma1);
const SNA_DVEC u2_r = SIMD_load(u_r + jju2 + ma2);
const SNA_DVEC u1_i = SIMD_load(u_i + jju1 + ma1);
const SNA_DVEC u2_i = SIMD_load(u_i + jju2 + ma2);
suma1_r += (u1_r*u2_r - u1_i*u2_i) * cgblock[icga];
suma1_i += (u1_r*u2_i + u1_i*u2_r) * cgblock[icga];
ma1+= vector_width();
ma2-= vector_width();
icga += j2;
} // end loop over ia
ztmp_r += suma1_r * cgblock[icgb];
ztmp_i += suma1_i * cgblock[icgb];
jju1 += (j1 + 1) * vector_width();
jju2 -= (j2 + 1) * vector_width();
icgb += j2;
} // end loop over ib
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
// find right y_list[jju] and beta[jjb] entries
// multiply and divide by j+1 factors
// account for multiplicity of 1, 2, or 3
if (bnorm_flag) {
ztmp_i *= SIMD_rcp(SIMD_set(static_cast<double>(j+1)));
ztmp_r *= SIMD_rcp(SIMD_set(static_cast<double>(j+1)));
}
if (COMPUTE_YI) {
int jju = idxz[jjz].jju;
for (int elem3 = 0; elem3 < nelements; elem3++) {
int itriple;
double bfactor = choose_beta(j, j1, j2, elem1, elem2, elem3,
itriple);
const SNA_DVEC betaj = beta[itriple] * bfactor;
const int i = elem3 * idxu_max + jju;
SIMD_store(&(ylist_r[i]), SIMD_load(ylist_r + i) + betaj * ztmp_r);
SIMD_store(&(ylist_i[i]), SIMD_load(ylist_i + i) + betaj * ztmp_i);
}
} else {
SIMD_store(zlist_rp + zlist_i, ztmp_r);
SIMD_store(zlist_ip + zlist_i, ztmp_i);
zlist_i += vector_width();
}
}// end loop over jjz
}
}
/* ----------------------------------------------------------------------
compute Yi from Zi
------------------------------------------------------------------------- */
void SNAIntel::compute_yi_from_zi(const SNA_DVEC* beta)
{
memset(ylist_r,0,idxu_max*nelements*sizeof(SNA_DVEC));
memset(ylist_i,0,idxu_max*nelements*sizeof(SNA_DVEC));
double *zlist_rp = (double *)zlist_r;
double *zlist_ip = (double *)zlist_i;
int zlist_i = 0;
for (int elem1 = 0; elem1 < nelements; elem1++)
for (int elem2 = 0; elem2 < nelements; elem2++) {
for (int jjz = 0; jjz < idxz_max; jjz++) {
const int j1 = idxz[jjz].j1;
const int j2 = idxz[jjz].j2;
const int j = idxz[jjz].j;
const SNA_DVEC ztmp_r = SIMD_load(zlist_rp + zlist_i);
const SNA_DVEC ztmp_i = SIMD_load(zlist_ip + zlist_i);
zlist_i += vector_width();
int jju = idxz[jjz].jju;
for (int elem3 = 0; elem3 < nelements; elem3++) {
int itriple;
double bfactor = choose_beta(j, j1, j2, elem1, elem2, elem3,
itriple);
const SNA_DVEC betaj = beta[itriple] * bfactor;
const int i = elem3 * idxu_max + jju;
SIMD_store(&(ylist_r[i]), SIMD_load(ylist_r + i) + betaj * ztmp_r);
SIMD_store(&(ylist_i[i]), SIMD_load(ylist_i + i) + betaj * ztmp_i);
}
} // end loop over jjz
}
}
/* ----------------------------------------------------------------------
compute dEidRj
------------------------------------------------------------------------- */
void SNAIntel::compute_deidrj_e(const int jj, const SNA_IVEC &jnum,
SNA_DVEC* dedr)
{
double *ylist_rp = (double *)ylist_r;
double *ylist_ip = (double *)ylist_i;
double *dulist_rp = (double *)(dulist_r[0]);
double *dulist_ip = (double *)(dulist_i[0]);
for (int k = 0; k < 3; k++)
dedr[k] = SIMD_set(0.0);
SNA_IVEC jelem;
if (chem_flag) jelem = SIMD_load(element + jj);
else jelem = SIMD256_set(0);
SIMD_mask m(jj < jnum);
for (int j = 0; j <= twojmax; j++) {
int jju = idxu_block[j] * vector_width();
int jju3 = jju * 3;
SNA_IVEC i = jelem*idxu_max*vector_width() + jju + SIMD256_count();
for (int mb = 0; 2*mb < j; mb++)
for (int ma = 0; ma <= j; ma++) {
SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i);
SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i);
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
dedr[k] = SIMD_add(m, dedr[k], du);
jju3 += vector_width();
}
i = i + vector_width();
}
if (j%2 == 0) {
int mb = j / 2;
for (int ma = 0; ma < mb; ma++) {
SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i);
SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i);
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
dedr[k] = SIMD_add(m, dedr[k], du);
jju3 += vector_width();
}
i = i + vector_width();
}
SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i);
SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i);
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
dedr[k] = SIMD_fma(m, SIMD_set(0.5), du, dedr[k]);
jju3 += vector_width();
}
} // if j%2
} // for j
for (int k = 0; k < 3; k++)
dedr[k] = dedr[k] * 2.0;
}
/* ----------------------------------------------------------------------
compute dEidRj
------------------------------------------------------------------------- */
void SNAIntel::compute_deidrj(const int jj, const SNA_IVEC &jnum,
SNA_DVEC* dedr)
{
double *ylist_rp = (double *)ylist_r;
double *ylist_ip = (double *)ylist_i;
double *dulist_rp = (double *)(dulist_r[0]);
double *dulist_ip = (double *)(dulist_i[0]);
for (int k = 0; k < 3; k++)
dedr[k] = SIMD_set(0.0);
SIMD_mask m(jj < jnum);
for (int j = 0; j <= twojmax; j++) {
int jju = idxu_block[j] * vector_width();
int jju3 = jju * 3;
for (int mb = 0; 2*mb < j; mb++)
for (int ma = 0; ma <= j; ma++) {
SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju);
SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju);
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
dedr[k] = SIMD_add(m, dedr[k], du);
jju3 += vector_width();
}
jju += vector_width();
}
if (j%2 == 0) {
int mb = j / 2;
for (int ma = 0; ma < mb; ma++) {
SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju);
SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju);
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
dedr[k] = SIMD_add(m, dedr[k], du);
jju3 += vector_width();
}
jju += vector_width();
}
SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju);
SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju);
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
dedr[k] = SIMD_fma(m, SIMD_set(0.5), du, dedr[k]);
jju3 += vector_width();
}
} // if j%2
} // for j
for (int k = 0; k < 3; k++)
dedr[k] = dedr[k] * 2.0;
}
/* ----------------------------------------------------------------------
compute Bi by summing conj(Ui)*Zi
------------------------------------------------------------------------- */
void SNAIntel::compute_bi(const SNA_IVEC &ielem) {
// for j1 = 0,...,twojmax
// for j2 = 0,twojmax
// for j = |j1-j2|,Min(twojmax,j1+j2),2
// b(j1,j2,j) = 0
// for mb = 0,...,jmid
// for ma = 0,...,j
// b(j1,j2,j) +=
// 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb)
double *ulisttot_rp = (double *)ulisttot_r;
double *ulisttot_ip = (double *)ulisttot_i;
double *blistp = (double *)blist;
int itriple = 0;
int idouble = 0;
for (int elem1 = 0; elem1 < nelements; elem1++)
for (int elem2 = 0; elem2 < nelements; elem2++) {
double *zlist_rp = (double *)(zlist_r + idouble*idxz_max);
double *zlist_ip = (double *)(zlist_i + idouble*idxz_max);
for (int elem3 = 0; elem3 < nelements; elem3++) {
for (int jjb = 0; jjb < idxb_max; jjb++) {
const int j1 = idxb[jjb].j1;
const int j2 = idxb[jjb].j2;
const int j = idxb[jjb].j;
int jjz = idxz_block[j1][j2][j] * vector_width();
int jju = (elem3 * idxu_max + idxu_block[j]) * vector_width();
SNA_DVEC sumzu(0.0);
for (int mb = 0; 2 * mb < j; mb++)
for (int ma = 0; ma <= j; ma++) {
const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz);
const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz);
sumzu = sumzu + utot_r * z_r + utot_i * z_i;
jjz += vector_width();
jju += vector_width();
} // end loop over ma, mb
// For j even, handle middle column
if (j % 2 == 0) {
int mb = j / 2;
for (int ma = 0; ma < mb; ma++) {
const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz);
const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz);
sumzu = sumzu + utot_r * z_r + utot_i * z_i;
jjz += vector_width();
jju += vector_width();
}
const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz);
const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz);
sumzu = sumzu + (utot_r * z_r + utot_i * z_i) * 0.5;
} // end if jeven
SIMD_store(blistp + (itriple*idxb_max+jjb) * vector_width(),
sumzu * 2.0);
}
itriple++;
}
idouble++;
}
// apply bzero shift
if (bzero_flag) {
if (!wselfall_flag) {
SNA_IVEC itriplev = (ielem*nelements+ielem)*nelements+ielem;
for (int jjb = 0; jjb < idxb_max; jjb++) {
const int j = idxb[jjb].j;
SNA_IVEC i = (itriplev*idxb_max+jjb) * vector_width() + SIMD256_count();
SIMD_scatter(blistp, i, SIMD_gather(blistp, i) - bzero[j]);
} // end loop over JJ
} else {
int itriple = 0;
for (int elem1 = 0; elem1 < nelements; elem1++)
for (int elem2 = 0; elem2 < nelements; elem2++) {
for (int elem3 = 0; elem3 < nelements; elem3++) {
for (int jjb = 0; jjb < idxb_max; jjb++) {
const int j = idxb[jjb].j;
int i = (itriple*idxb_max+jjb) * vector_width();
SIMD_store(blistp + i, SIMD_load(blistp + i) - bzero[j]);
} // end loop over JJ
itriple++;
} // end loop over elem3
} // end loop over elem1,elem2
}
}
}
/* ----------------------------------------------------------------------
calculate derivative of Ui w.r.t. atom j
------------------------------------------------------------------------- */
void SNAIntel::compute_duidrj(const int jj, const SNA_IVEC &jnum)
{
const SNA_DVEC x = rij[jj][0];
const SNA_DVEC y = rij[jj][1];
const SNA_DVEC z = rij[jj][2];
const SNA_DVEC rcut = rcutij[jj];
const SNA_DVEC rsq = x * x + y * y + z * z;
const SNA_DVEC r = SIMD_sqrt(rsq);
const SNA_DVEC rscale0 = SIMD_rcp(rcut - rmin0) * rfac0 * MY_PI;
const SNA_DVEC theta0 = (r - rmin0) * rscale0;
const SNA_DVEC z0 = r * SIMD_rcp(SIMD_tan(theta0));
const SNA_DVEC dz0dr = z0 * SIMD_rcp(r) - (r*rscale0) * (rsq + z0 * z0) *
SIMD_rcp(rsq);
compute_duarray(x, y, z, z0, r, dz0dr, wj[jj], rcut, jj, jnum);
}
/* ---------------------------------------------------------------------- */
void SNAIntel::zero_uarraytot(const SNA_IVEC &ielem)
{
double *ulisttot_rp = (double *)ulisttot_r;
double *ulisttot_ip = (double *)ulisttot_i;
for (int jelem = 0; jelem < nelements; jelem++)
for (int j = 0; j <= twojmax; j++) {
int jju = (jelem * idxu_max + idxu_block[j]) * vector_width();
for (int mb = 0; mb <= j; mb++) {
for (int ma = 0; ma <= j; ma++) {
SIMD_store(ulisttot_rp + jju, SIMD_set(0.0));
SIMD_store(ulisttot_ip + jju, SIMD_set(0.0));
// utot(j,ma,ma) = wself, sometimes
if (ma == mb) {
if (wselfall_flag || nelements == 1)
SIMD_store(ulisttot_rp + jju, SIMD_set(wself));
else {
SIMD_mask m(ielem == jelem);
SIMD_store(ulisttot_rp + jju,
SIMD_zero_masked(~m, SIMD_set(wself)));
}
}
jju += vector_width();
}
}
}
}
/* ----------------------------------------------------------------------
add Wigner U-functions for one neighbor to the total
------------------------------------------------------------------------- */
void SNAIntel::add_uarraytot(const SNA_DVEC &r, const int jj,
const SNA_IVEC &jnum)
{
SNA_DVEC sfac = compute_sfac(r, rcutij[jj], sinnerij[jj], dinnerij[jj]);
sfac *= wj[jj];
double *ulisttot_rp = (double *)ulisttot_r;
double *ulisttot_ip = (double *)ulisttot_i;
const double* ulist_r = (double *)(ulist_r_ij[jj]);
const double* ulist_i = (double *)(ulist_i_ij[jj]);
SIMD_mask m(jj < jnum);
if (chem_flag && nelements > 1) {
SNA_IVEC jelem = SIMD_load(element+jj);
for (int j = 0; j <= twojmax; j++) {
int jju = idxu_block[j] * vector_width();
SNA_IVEC i = jelem*idxu_max*vector_width() + jju + SIMD256_count();
for (int mb = 0; mb <= j; mb++)
for (int ma = 0; ma <= j; ma++) {
SNA_DVEC utot_r = SIMD_gather(m, ulisttot_rp, i);
SNA_DVEC utot_i = SIMD_gather(m, ulisttot_ip, i);
utot_r = SIMD_fma(m, sfac, SIMD_load(ulist_r + jju), utot_r);
utot_i = SIMD_fma(m, sfac, SIMD_load(ulist_i + jju), utot_i);
SIMD_scatter(m, ulisttot_rp, i, utot_r);
SIMD_scatter(m, ulisttot_ip, i, utot_i);
jju += vector_width();
i = i + vector_width();
}
}
} else {
for (int j = 0; j <= twojmax; j++) {
int jju = idxu_block[j] * vector_width();
for (int mb = 0; mb <= j; mb++)
for (int ma = 0; ma <= j; ma++) {
SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
utot_r = SIMD_fma(m, sfac, SIMD_load(ulist_r + jju), utot_r);
utot_i = SIMD_fma(m, sfac, SIMD_load(ulist_i + jju), utot_i);
SIMD_store(ulisttot_rp + jju, utot_r);
SIMD_store(ulisttot_ip + jju, utot_i);
jju += vector_width();
}
}
}
}
/* ----------------------------------------------------------------------
compute Wigner U-functions for one neighbor
------------------------------------------------------------------------- */
void SNAIntel::compute_uarray(const SNA_DVEC &x, const SNA_DVEC &y,
const SNA_DVEC &z, const SNA_DVEC &z0,
const SNA_DVEC &r, const int jj,
const SNA_IVEC &jnum)
{
// compute Cayley-Klein parameters for unit quaternion
const SNA_DVEC r0inv = SIMD_invsqrt(r * r + z0 * z0);
const SNA_DVEC a_r = z0 * r0inv;
const SNA_DVEC a_i = -z * r0inv;
const SNA_DVEC b_r = y * r0inv;
const SNA_DVEC b_i = -x * r0inv;
// VMK Section 4.8.2
double *ulist_rp = (double *)(ulist_r_ij[jj]);
double *ulist_ip = (double *)(ulist_i_ij[jj]);
SIMD_store(ulist_rp, SIMD_set(1.0));
SIMD_store(ulist_ip, SIMD_set(0.0));
for (int j = 1; j <= twojmax; j++) {
int jju = idxu_block[j] * vector_width();
int jjup = idxu_block[j-1] * vector_width();
// fill in left side of matrix layer from previous layer
for (int mb = 0; 2*mb <= j; mb++) {
SIMD_store(ulist_rp + jju, SIMD_set(0.0));
SIMD_store(ulist_ip + jju, SIMD_set(0.0));
for (int ma = 0; ma < j; ma++) {
double rootpq = rootpqarray[j - ma][j - mb];
SNA_DVEC u_r = SIMD_load(ulist_rp + jju);
SNA_DVEC u_i = SIMD_load(ulist_ip + jju);
const SNA_DVEC up_r = SIMD_load(ulist_rp + jjup);
const SNA_DVEC up_i = SIMD_load(ulist_ip + jjup);
SNA_DVEC u_ro, u_io;
u_ro = a_r * up_r + a_i * up_i;
u_r = SIMD_fma(SIMD_set(rootpq), u_ro, u_r);
SIMD_store(ulist_rp + jju, u_r);
u_io = a_r * up_i - a_i * up_r;
u_i = SIMD_fma(SIMD_set(rootpq), u_io, u_i);
SIMD_store(ulist_ip + jju, u_i);
jju += vector_width();
rootpq = -rootpqarray[ma + 1][j - mb];
u_r = (b_r * up_r + b_i * up_i) * rootpq;
SIMD_store(ulist_rp + jju, u_r);
u_i = (b_r * up_i - b_i * up_r) * rootpq;
SIMD_store(ulist_ip + jju, u_i);
jjup += vector_width();
}
jju += vector_width();
}
// copy left side to right side with inversion symmetry VMK 4.4(2)
// u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
jju = idxu_block[j];
jjup = (jju+(j+1)*(j+1)-1) * vector_width();
jju *= vector_width();
int mbpar = 1;
for (int mb = 0; 2*mb <= j; mb++) {
int mapar = mbpar;
for (int ma = 0; ma <= j; ma++) {
if (mapar == 1) {
SIMD_store(ulist_rp + jjup, SIMD_load(ulist_rp + jju));
SIMD_store(ulist_ip + jjup, -SIMD_load(ulist_ip + jju));
} else {
SIMD_store(ulist_rp + jjup, -SIMD_load(ulist_rp + jju));
SIMD_store(ulist_ip + jjup, SIMD_load(ulist_ip + jju));
}
mapar = -mapar;
jju += vector_width();
jjup -= vector_width();
}
mbpar = -mbpar;
}
}
}
/* ----------------------------------------------------------------------
Compute derivatives of Wigner U-functions for one neighbor
see comments in compute_uarray()
------------------------------------------------------------------------- */
void SNAIntel::compute_duarray(const SNA_DVEC &x, const SNA_DVEC &y,
const SNA_DVEC &z, const SNA_DVEC &z0,
const SNA_DVEC &r, const SNA_DVEC &dz0dr,
const SNA_DVEC &wj, const SNA_DVEC &rcut,
const int jj, const SNA_IVEC &jnum)
{
const SNA_DVEC rinv = SIMD_rcp(r);
const SNA_DVEC r0inv = SIMD_invsqrt(r * r + z0 * z0);
SNA_DVEC up[3];
up[0] = x * rinv;
up[1] = y * rinv;
up[2] = z * rinv;
const SNA_DVEC a_r = z0 * r0inv;
const SNA_DVEC a_i = -z * r0inv;
const SNA_DVEC b_r = y * r0inv;
const SNA_DVEC b_i = -x * r0inv;
const SNA_DVEC dr0invdr = -SIMD_pow(r0inv, 3.0) * (r + z0 * dz0dr);
SNA_DVEC dr0inv[3], da_r[3], da_i[3];
for (int k = 0; k < 3; k++) {
dr0inv[k] = dr0invdr * up[k];
da_r[k] = dz0dr * up[k] * r0inv + z0 * dr0inv[k];
da_i[k] = -z * dr0inv[k];
}
da_i[2] += -r0inv;
double *ulist_rp = (double *)(ulist_r_ij[jj]);
double *ulist_ip = (double *)(ulist_i_ij[jj]);
double *dulist_rp = (double *)(dulist_r[0]);
double *dulist_ip = (double *)(dulist_i[0]);
SNA_DVEC db_r[3], db_i[3];
for (int k = 0; k < 3; k++) {
SIMD_store(dulist_rp + k * vector_width(), SIMD_set(0.0));
SIMD_store(dulist_ip + k * vector_width(), SIMD_set(0.0));
db_r[k] = y * dr0inv[k];
db_i[k] = -x * dr0inv[k];
}
db_i[0] -= r0inv;
db_r[1] += r0inv;
for (int j = 1; j <= twojmax; j++) {
int jju3 = idxu_block[j] * 3 * vector_width();
int jjup = idxu_block[j-1] * vector_width();
int jjup3 = jjup * 3;
for (int mb = 0; 2*mb <= j; mb++) {
for (int k = 0; k < 3; k++) {
SIMD_store(dulist_rp + jju3 + k * vector_width(), SIMD_set(0.0));
SIMD_store(dulist_ip + jju3 + k * vector_width(), SIMD_set(0.0));
}
for (int ma = 0; ma < j; ma++) {
const double rootpq = rootpqarray[j - ma][j - mb];
const double mrootpq = -rootpqarray[ma + 1][j - mb];
const SNA_DVEC up_r = SIMD_load(ulist_rp + jjup);
const SNA_DVEC up_i = SIMD_load(ulist_ip + jjup);
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
const SNA_DVEC dup_r = SIMD_load(dulist_rp + jjup3);
const SNA_DVEC dup_i = SIMD_load(dulist_ip + jjup3);
SNA_DVEC du_ro, du_io;
du_ro = (da_r[k]*up_r + da_i[k]*up_i + a_r*dup_r + a_i*dup_i);
du_r = SIMD_fma(SIMD_set(rootpq), du_ro, du_r);
SIMD_store(dulist_rp + jju3, du_r);
du_io = (da_r[k]*up_i - da_i[k]*up_r + a_r*dup_i - a_i*dup_r);
du_i = SIMD_fma(SIMD_set(rootpq), du_io, du_i);
SIMD_store(dulist_ip + jju3, du_i);
du_r = (db_r[k]*up_r + db_i[k]*up_i + b_r*dup_r + b_i*dup_i);
SIMD_store(dulist_rp + jju3 + 3 * vector_width(), du_r * mrootpq);
du_i = (db_r[k]*up_i - db_i[k]*up_r + b_r*dup_i - b_i*dup_r);
SIMD_store(dulist_ip + jju3 + 3 * vector_width(), du_i * mrootpq);
jju3 += vector_width();
jjup3 += vector_width();
}
jjup += vector_width();
} // for ma
jju3 += 3 * vector_width();
} // for mb
// copy left side to right side with inversion symmetry VMK 4.4(2)
// u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
SNA_DVEC *du_r_p = dulist_r[0];
SNA_DVEC *du_i_p = dulist_i[0];
int jju = idxu_block[j];
jjup = (jju+(j+1)*(j+1)-1) * 3 * vector_width();
jju *= 3 * vector_width();
int mbpar = 1;
for (int mb = 0; 2*mb <= j; mb++) {
int mapar = mbpar;
for (int ma = 0; ma <= j; ma++) {
if (mapar == 1) {
for (int k = 0; k < 3; k++) {
SIMD_store(dulist_rp + jjup, SIMD_load(dulist_rp + jju));
SIMD_store(dulist_ip + jjup, -SIMD_load(dulist_ip + jju));
jju += vector_width();
jjup += vector_width();
}
} else {
for (int k = 0; k < 3; k++) {
SIMD_store(dulist_rp + jjup, -SIMD_load(dulist_rp + jju));
SIMD_store(dulist_ip + jjup, SIMD_load(dulist_ip + jju));
jju += vector_width();
jjup += vector_width();
}
}
mapar = -mapar;
jjup -= 6 * vector_width();
} // for ma
mbpar = -mbpar;
} // for mb
} // for j
SNA_DVEC dsfac;
SNA_DVEC sfac = compute_sfac_dsfac(r, rcut, sinnerij[jj], dinnerij[jj],
dsfac);
sfac = sfac * wj;
dsfac = dsfac * wj;
for (int j = 0; j <= twojmax; j++) {
int jju = idxu_block[j] * vector_width();
int jju3 = jju * 3;
for (int mb = 0; 2*mb <= j; mb++)
for (int ma = 0; ma <= j; ma++) {
const SNA_DVEC ur_dsfac = dsfac * SIMD_load(ulist_rp + jju);
const SNA_DVEC ui_dsfac = dsfac * SIMD_load(ulist_ip + jju);
jju += vector_width();
for (int k = 0; k < 3; k++) {
SNA_DVEC du_r = ur_dsfac * up[k] + sfac * SIMD_load(dulist_rp+jju3);
SIMD_store(dulist_rp + jju3, du_r);
SNA_DVEC du_i = ui_dsfac * up[k] + sfac * SIMD_load(dulist_ip+jju3);
SIMD_store(dulist_ip + jju3, du_i);
jju3 += vector_width();
}
}
}
}
/* ----------------------------------------------------------------------
memory usage of arrays
------------------------------------------------------------------------- */
double SNAIntel::memory_usage()
{
int jdimpq = twojmax + 2;
int jdim = twojmax + 1;
double bytes;
bytes = 0;
bytes += (double)jdimpq*jdimpq * sizeof(double); // pqarray
bytes += (double)idxcg_max * sizeof(double); // cglist
bytes += (double)nmax * idxu_max * sizeof(SNA_DVEC) * 2; // ulist_ij
bytes += (double)idxu_max * nelements * sizeof(SNA_DVEC) * 2; // ulisttot
bytes += (double)idxu_max * 3 * sizeof(SNA_DVEC) * 2; // dulist
bytes += (double)idxz_max * ndoubles * sizeof(SNA_DVEC) * 2; // zlist
bytes += (double)idxb_max * ntriples * sizeof(SNA_DVEC); // blist
bytes += (double)idxb_max * ntriples * 3 * sizeof(double); // dblist
bytes += (double)idxu_max * nelements * sizeof(SNA_DVEC) * 2; // ylist
bytes += (double)jdim * jdim * jdim * sizeof(int); // idxcg_block
bytes += (double)jdim * sizeof(int); // idxu_block
bytes += (double)jdim * jdim * jdim * sizeof(int); // idxz_block
bytes += (double)jdim * jdim * jdim * sizeof(int); // idxb_block
bytes += (double)idxz_max * sizeof(SNA_ZINDICES); // idxz
bytes += (double)idxb_max * sizeof(SNA_BINDICES); // idxb
if (bzero_flag)
bytes += (double)jdim * sizeof(double); // bzero
bytes += (double)nmax * 3 * sizeof(SNA_DVEC); // rij
bytes += (double)nmax * sizeof(SNA_IVEC); // inside
bytes += (double)nmax * sizeof(SNA_DVEC); // wj
bytes += (double)nmax * sizeof(SNA_DVEC); // rcutij
bytes += (double)nmax * sizeof(SNA_DVEC); // sinnerij
bytes += (double)nmax * sizeof(SNA_DVEC); // dinnerij
if (chem_flag) bytes += (double)nmax * sizeof(SNA_IVEC); // element
return bytes;
}
/* ---------------------------------------------------------------------- */
void SNAIntel::create_twojmax_arrays()
{
int jdimpq = twojmax + 2;
memory->create(rootpqarray, jdimpq, jdimpq,
"sna:rootpqarray");
memory->create(cglist, idxcg_max, "sna:cglist");
memory->create(ulisttot_r, idxu_max*nelements, "sna:ulisttot");
memory->create(ulisttot_i, idxu_max*nelements, "sna:ulisttot");
memory->create(dulist_r, idxu_max, 3, "sna:dulist");
memory->create(dulist_i, idxu_max, 3, "sna:dulist");
memory->create(zlist_r, idxz_max*ndoubles, "sna:zlist");
memory->create(zlist_i, idxz_max*ndoubles, "sna:zlist");
memory->create(blist, idxb_max*ntriples, "sna:blist");
memory->create(dblist, idxb_max*ntriples, 3, "sna:dblist");
memory->create(ylist_r, idxu_max*nelements, "sna:ylist");
memory->create(ylist_i, idxu_max*nelements, "sna:ylist");
if (bzero_flag)
memory->create(bzero, twojmax+1,"sna:bzero");
else
bzero = nullptr;
}
/* ---------------------------------------------------------------------- */
void SNAIntel::destroy_twojmax_arrays()
{
memory->destroy(rootpqarray);
memory->destroy(cglist);
memory->destroy(ulisttot_r);
memory->destroy(ulisttot_i);
memory->destroy(dulist_r);
memory->destroy(dulist_i);
memory->destroy(zlist_r);
memory->destroy(zlist_i);
memory->destroy(blist);
memory->destroy(dblist);
memory->destroy(ylist_r);
memory->destroy(ylist_i);
memory->destroy(idxcg_block);
memory->destroy(idxu_block);
memory->destroy(idxz_block);
memory->destroy(idxb_block);
if (bzero_flag)
memory->destroy(bzero);
}
/* ----------------------------------------------------------------------
the function delta given by VMK Eq. 8.2(1)
------------------------------------------------------------------------- */
double SNAIntel::deltacg(int j1, int j2, int j)
{
double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
return sqrt(factorial((j1 + j2 - j) / 2) *
factorial((j1 - j2 + j) / 2) *
factorial((-j1 + j2 + j) / 2) / sfaccg);
}
/* ----------------------------------------------------------------------
assign Clebsch-Gordan coefficients using
the quasi-binomial formula VMK 8.2.1(3)
------------------------------------------------------------------------- */
void SNAIntel::init_clebsch_gordan()
{
double sum,dcg,sfaccg;
int m, aa2, bb2, cc2;
int ifac;
int idxcg_count = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
for (int m1 = 0; m1 <= j1; m1++) {
aa2 = 2 * m1 - j1;
for (int m2 = 0; m2 <= j2; m2++) {
// -c <= cc <= c
bb2 = 2 * m2 - j2;
m = (aa2 + bb2 + j) / 2;
if (m < 0 || m > j) {
cglist[idxcg_count] = 0.0;
idxcg_count++;
continue;
}
sum = 0.0;
for (int z = MAX(0, MAX(-(j - j2 + aa2)
/ 2, -(j - j1 - bb2) / 2));
z <= MIN((j1 + j2 - j) / 2,
MIN((j1 - aa2) / 2, (j2 + bb2) / 2));
z++) {
ifac = z % 2 ? -1 : 1;
sum += ifac /
(factorial(z) *
factorial((j1 + j2 - j) / 2 - z) *
factorial((j1 - aa2) / 2 - z) *
factorial((j2 + bb2) / 2 - z) *
factorial((j - j2 + aa2) / 2 + z) *
factorial((j - j1 - bb2) / 2 + z));
}
cc2 = 2 * m - j;
dcg = deltacg(j1, j2, j);
sfaccg = sqrt(factorial((j1 + aa2) / 2) *
factorial((j1 - aa2) / 2) *
factorial((j2 + bb2) / 2) *
factorial((j2 - bb2) / 2) *
factorial((j + cc2) / 2) *
factorial((j - cc2) / 2) *
(j + 1));
cglist[idxcg_count] = sum * dcg * sfaccg;
idxcg_count++;
}
}
}
}
/* ----------------------------------------------------------------------
print out values of Clebsch-Gordan coefficients
format and notation follows VMK Table 8.11
------------------------------------------------------------------------- */
void SNAIntel::print_clebsch_gordan()
{
if (comm->me) return;
int aa2, bb2, cc2;
for (int j = 0; j <= twojmax; j += 1) {
printf("c = %g\n",j/2.0);
printf("a alpha b beta C_{a alpha b beta}^{c alpha+beta}\n");
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
if (j1-j2 <= j && j1+j2 >= j && (j1+j2+j)%2 == 0) {
int idxcg_count = idxcg_block[j1][j2][j];
for (int m1 = 0; m1 <= j1; m1++) {
aa2 = 2*m1-j1;
for (int m2 = 0; m2 <= j2; m2++) {
bb2 = 2*m2-j2;
double cgtmp = cglist[idxcg_count];
cc2 = aa2+bb2;
if (cc2 >= -j && cc2 <= j)
if (j1 != j2 || (aa2 > bb2 && aa2 >= -bb2) || (aa2 == bb2 && aa2 >= 0))
printf("%4g %4g %4g %4g %10.6g\n",
j1/2.0,aa2/2.0,j2/2.0,bb2/2.0,cgtmp);
idxcg_count++;
}
}
}
}
}
/* ----------------------------------------------------------------------
pre-compute table of sqrt[p/m2], p, q = 1,twojmax
the p = 0, q = 0 entries are allocated and skipped for convenience.
------------------------------------------------------------------------- */
void SNAIntel::init_rootpqarray()
{
for (int p = 1; p <= twojmax; p++)
for (int q = 1; q <= twojmax; q++)
rootpqarray[p][q] = sqrt(static_cast<double>(p)/q);
}
/* ---------------------------------------------------------------------- */
void SNAIntel::compute_ncoeff()
{
int ncount;
ncount = 0;
for (int j1 = 0; j1 <= twojmax; j1++)
for (int j2 = 0; j2 <= j1; j2++)
for (int j = j1 - j2;
j <= MIN(twojmax, j1 + j2); j += 2)
if (j >= j1) ncount++;
ndoubles = nelements*nelements;
ntriples = nelements*nelements*nelements;
if (chem_flag)
ncoeff = ncount*ntriples;
else
ncoeff = ncount;
}
/* ---------------------------------------------------------------------- */
double SNAIntel::compute_sfac(double r, double rcut, double sinner, double dinner)
{
double sfac;
// calculate sfac = sfac_outer
if (switch_flag == 0) sfac = 1.0;
else if (r <= rmin0) sfac = 1.0;
else if (r > rcut) sfac = 0.0;
else {
double rcutfac = MY_PI / (rcut - rmin0);
sfac = 0.5 * (cos((r - rmin0) * rcutfac) + 1.0);
}
// calculate sfac *= sfac_inner, rarely visited
if (switch_inner_flag == 1 && r < sinner + dinner) {
if (r > sinner - dinner) {
double rcutfac = MY_PI2 / dinner;
sfac *= 0.5 * (1.0 - cos(MY_PI2 + (r - sinner) * rcutfac));
} else sfac = 0.0;
}
return sfac;
}
/* ---------------------------------------------------------------------- */
SNA_DVEC SNAIntel::compute_sfac(const SNA_DVEC &r, const SNA_DVEC &rcut,
const SNA_DVEC &sinner, const SNA_DVEC &dinner)
{
// calculate sfac = sfac_outer
// if (switch_flag == 0 || r <= rmin0)
SNA_DVEC sfac = SIMD_set(1.0);
if (switch_flag != 0) {
// r <= rcut && r > rmin0
const SIMD_mask i(r > rmin0);
const SIMD_mask m(r <= rcut);
const SNA_DVEC rcutfac = SIMD_rcp(rcut - rmin0) * MY_PI;
const SNA_DVEC sfac_m = (SIMD_cos((r - rmin0) * rcutfac) + 1.0) * 0.5;
sfac = SIMD_set(sfac, m & i, sfac_m);
// (r > rcut) && (r> rmin0)
sfac = SIMD_zero_masked(m | i, sfac);
}
// calculate sfac *= sfac_inner, rarely visited
if (switch_inner_flag == 1) {
const SIMD_mask m(r < sinner + dinner);
// if any(m)
const SIMD_mask i(r > sinner - dinner);
const SNA_DVEC rcutfac = SIMD_rcp(dinner) * MY_PI2;
const SNA_DVEC sfac_m = (SIMD_set(1.0) - SIMD_cos((r-sinner) * rcutfac +
MY_PI2)) * 0.5;
sfac = SIMD_set(sfac, m & i, sfac_m);
sfac = SIMD_zero_masked((~m) | i, sfac);
}
return sfac;
}
/* ---------------------------------------------------------------------- */
SNA_DVEC SNAIntel::compute_sfac_dsfac(const SNA_DVEC & r,
const SNA_DVEC & rcut,
const SNA_DVEC & sinner,
const SNA_DVEC & dinner,
SNA_DVEC &dsfac)
{
// calculate sfac = sfac_outer
// if (switch_flag == 0 || r <= rmin0)
SNA_DVEC sfac = SIMD_set(1.0);
dsfac = SIMD_set(0.0);
if (switch_flag != 0) {
// r <= rcut && r > rmin0
const SIMD_mask i(r > rmin0);
const SIMD_mask m(r <= rcut);
const SNA_DVEC rcutfac = SIMD_rcp(rcut - rmin0) * MY_PI;
const SNA_DVEC trig_arg = (r - rmin0) * rcutfac;
const SNA_DVEC sfac_m = (SIMD_cos(trig_arg) + 1.0) * 0.5;
const SNA_DVEC dsfac_m = SIMD_sin(trig_arg) * rcutfac * -0.5;
sfac = SIMD_set(sfac, m & i, sfac_m);
dsfac = SIMD_set(dsfac, m & i, dsfac_m);
// (r > rcut) && (r> rmin0)
sfac = SIMD_zero_masked(m | i, sfac);
}
// calculate sfac *= sfac_inner, rarely visited
if (switch_inner_flag == 1) {
const SIMD_mask m(r < sinner + dinner);
const SIMD_mask i(r > sinner - dinner);
if (any(m & i)) {
const SNA_DVEC rcutfac = SIMD_rcp(dinner) * MY_PI2;
const SNA_DVEC trig_arg = (r - sinner) * rcutfac + MY_PI2;
const SNA_DVEC sfac_inner = (SIMD_set(1.0) - SIMD_cos(trig_arg)) * 0.5;
const SNA_DVEC dsfac_inner = rcutfac * 0.5 * SIMD_sin(trig_arg);
dsfac = SIMD_set(dsfac, m & i, dsfac * sfac_inner +
sfac * dsfac_inner);
sfac = SIMD_set(sfac, m & i, sfac_inner);
}
sfac = SIMD_zero_masked((~m) | i, sfac);
dsfac = SIMD_zero_masked((~m) | i, dsfac);
}
return sfac;
}
template void SNAIntel::compute_zi_or_yi<1>(const SNA_DVEC *);
template void SNAIntel::compute_zi_or_yi<0>(const SNA_DVEC *);
#endif
#endif