// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: W. Michael Brown, Intel ------------------------------------------------------------------------- */ #if defined(__AVX512F__) #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) #include "sna_intel.h" #include "comm.h" #include "error.h" #include "math_const.h" #include "math_special.h" #include "memory.h" #include using namespace std; using namespace LAMMPS_NS; using namespace MathConst; using namespace MathSpecial; using namespace ip_simd; /* ---------------------------------------------------------------------- this implementation is based on the method outlined in Bartok[1], using formulae from VMK[2]. for the Clebsch-Gordan coefficients, we convert the VMK half-integral labels a, b, c, alpha, beta, gamma to array offsets j1, j2, j, m1, m2, m using the following relations: j1 = 2*a j2 = 2*b j = 2*c m1 = alpha+a 2*alpha = 2*m1 - j1 m2 = beta+b or 2*beta = 2*m2 - j2 m = gamma+c 2*gamma = 2*m - j in this way: -a <= alpha <= a -b <= beta <= b -c <= gamma <= c becomes: 0 <= m1 <= j1 0 <= m2 <= j2 0 <= m <= j and the requirement that a+b+c be integral implies that j1+j2+j must be even. The requirement that: gamma = alpha+beta becomes: 2*m - j = 2*m1 - j1 + 2*m2 - j2 Similarly, for the Wigner U-functions U(J,m,m') we convert the half-integral labels J,m,m' to array offsets j,ma,mb: j = 2*J ma = J+m mb = J+m' so that: 0 <= j <= 2*Jmax 0 <= ma, mb <= j. For the bispectrum components B(J1,J2,J) we convert to: j1 = 2*J1 j2 = 2*J2 j = 2*J and the requirement: |J1-J2| <= J <= J1+J2, for j1+j2+j integral becomes: |j1-j2| <= j <= j1+j2, for j1+j2+j even integer or j = |j1-j2|, |j1-j2|+2,...,j1+j2-2,j1+j2 [1] Albert Bartok-Partay, "Gaussian Approximation..." Doctoral Thesis, Cambridge University, (2009) [2] D. A. Varshalovich, A. N. Moskalev, and V. K. Khersonskii, "Quantum Theory of Angular Momentum," World Scientific (1988) ------------------------------------------------------------------------- */ SNAIntel::SNAIntel(LAMMPS* lmp, double rfac0_in, int twojmax_in, double rmin0_in, int switch_flag_in, int bzero_flag_in, int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in, int nelements_in, int switch_inner_flag_in) : Pointers(lmp) { wself = 1.0; rfac0 = rfac0_in; rmin0 = rmin0_in; switch_flag = switch_flag_in; switch_inner_flag = switch_inner_flag_in; bzero_flag = bzero_flag_in; chem_flag = chem_flag_in; bnorm_flag = bnorm_flag_in; wselfall_flag = wselfall_flag_in; if (bnorm_flag != chem_flag) lmp->error->warning(FLERR, "bnormflag and chemflag are not equal." "This is probably not what you intended"); if (chem_flag) nelements = nelements_in; else nelements = 1; twojmax = twojmax_in; compute_ncoeff(); rij = nullptr; inside = nullptr; wj = nullptr; rcutij = nullptr; sinnerij = nullptr; dinnerij = nullptr; element = nullptr; nmax = 0; idxz = nullptr; idxb = nullptr; ulist_r_ij = nullptr; ulist_i_ij = nullptr; build_indexlist(); create_twojmax_arrays(); if (bzero_flag) { double www = wself*wself*wself; for (int j = 0; j <= twojmax; j++) if (bnorm_flag) bzero[j] = www; else bzero[j] = www*(j+1); } } /* ---------------------------------------------------------------------- */ SNAIntel::~SNAIntel() { memory->destroy(rij); memory->destroy(inside); memory->destroy(wj); memory->destroy(rcutij); memory->destroy(sinnerij); memory->destroy(dinnerij); if (chem_flag) memory->destroy(element); memory->destroy(ulist_r_ij); memory->destroy(ulist_i_ij); delete[] idxz; delete[] idxb; destroy_twojmax_arrays(); } void SNAIntel::build_indexlist() { // index list for cglist int jdim = twojmax + 1; memory->create(idxcg_block, jdim, jdim, jdim, "sna:idxcg_block"); int idxcg_count = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { idxcg_block[j1][j2][j] = idxcg_count; for (int m1 = 0; m1 <= j1; m1++) for (int m2 = 0; m2 <= j2; m2++) idxcg_count++; } idxcg_max = idxcg_count; // index list for uarray // need to include both halves memory->create(idxu_block, jdim, "sna:idxu_block"); int idxu_count = 0; for (int j = 0; j <= twojmax; j++) { idxu_block[j] = idxu_count; for (int mb = 0; mb <= j; mb++) for (int ma = 0; ma <= j; ma++) idxu_count++; } idxu_max = idxu_count; // index list for beta and B int idxb_count = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) idxb_count++; idxb_max = idxb_count; idxb = new SNA_BINDICES[idxb_max]; idxb_count = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) { idxb[idxb_count].j1 = j1; idxb[idxb_count].j2 = j2; idxb[idxb_count].j = j; idxb_count++; } // reverse index list for beta and b memory->create(idxb_block, jdim, jdim, jdim, "sna:idxb_block"); idxb_count = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { if (j >= j1) { idxb_block[j1][j2][j] = idxb_count; idxb_count++; } } // index list for zlist int idxz_count = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) for (int mb = 0; 2*mb <= j; mb++) for (int ma = 0; ma <= j; ma++) idxz_count++; idxz_max = idxz_count; idxz = new SNA_ZINDICES[idxz_max]; memory->create(idxz_block, jdim, jdim, jdim, "sna:idxz_block"); idxz_count = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { idxz_block[j1][j2][j] = idxz_count; // find right beta[jjb] entry // multiply and divide by j+1 factors // account for multiplicity of 1, 2, or 3 for (int mb = 0; 2*mb <= j; mb++) for (int ma = 0; ma <= j; ma++) { idxz[idxz_count].j1 = j1; idxz[idxz_count].j2 = j2; idxz[idxz_count].j = j; idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2); idxz[idxz_count].ma2max = (2 * ma - j - (2 * idxz[idxz_count].ma1min - j1) + j2) / 2; idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count].ma1min + 1; idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2); idxz[idxz_count].mb2max = (2 * mb - j - (2 * idxz[idxz_count].mb1min - j1) + j2) / 2; idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1; // apply to z(j1,j2,j,ma,mb) to unique element of y(j) const int jju = idxu_block[j] + (j+1)*mb + ma; idxz[idxz_count].jju = jju; idxz_count++; } } } /* ---------------------------------------------------------------------- */ void SNAIntel::init() { init_clebsch_gordan(); // print_clebsch_gordan(); init_rootpqarray(); } void SNAIntel::grow_rij(int newnmax) { if (newnmax <= nmax) return; nmax = newnmax; memory->destroy(rij); memory->destroy(inside); memory->destroy(wj); memory->destroy(rcutij); memory->destroy(sinnerij); memory->destroy(dinnerij); if (chem_flag) memory->destroy(element); memory->destroy(ulist_r_ij); memory->destroy(ulist_i_ij); memory->create(rij, nmax, 3, "pair:rij"); memory->create(inside, nmax, "pair:inside"); memory->create(wj, nmax, "pair:wj"); memory->create(rcutij, nmax, "pair:rcutij"); memory->create(sinnerij, nmax, "pair:sinnerij"); memory->create(dinnerij, nmax, "pair:dinnerij"); if (chem_flag) memory->create(element, nmax, "sna:element"); memory->create(ulist_r_ij, nmax, idxu_max, "sna:ulist_ij"); memory->create(ulist_i_ij, nmax, idxu_max, "sna:ulist_ij"); } /* ---------------------------------------------------------------------- compute Ui by summing over neighbors j ------------------------------------------------------------------------- */ void SNAIntel::compute_ui(const SNA_IVEC &jnum, const SNA_IVEC &ielem, const int max_jnum) { // utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,ma) = 1 for all j,ma // for j in neighbors of i: // compute r0 = (x,y,z,z0) // utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb zero_uarraytot(ielem); for (int j = 0; j < max_jnum; j++) { const SNA_DVEC x = rij[j][0]; const SNA_DVEC y = rij[j][1]; const SNA_DVEC z = rij[j][2]; const SNA_DVEC rcut = rcutij[j]; const SNA_DVEC rsq = x * x + y * y + z * z; const SNA_DVEC r = SIMD_sqrt(rsq); const SNA_DVEC rscale0 = SIMD_rcp(rcut - rmin0) * rfac0 * MY_PI; const SNA_DVEC theta0 = (r - rmin0) * rscale0; const SNA_DVEC z0 = r * SIMD_rcp(SIMD_tan(theta0)); compute_uarray(x, y, z, z0, r, j, jnum); add_uarraytot(r, j, jnum); } } /* ---------------------------------------------------------------------- pick out right beta value ------------------------------------------------------------------------- */ double SNAIntel::choose_beta(const int j, const int j1, const int j2, const int elem1, const int elem2, const int elem3, int &itriple) { double bfactor; if (j >= j1) { const int jjb = idxb_block[j1][j2][j]; itriple = ((elem1 * nelements + elem2) * nelements + elem3) * idxb_max + jjb; if (j1 == j) { if (j2 == j) bfactor = 3.0; else bfactor = 2.0; } else bfactor = 1.0; } else if (j >= j2) { const int jjb = idxb_block[j][j2][j1]; itriple = ((elem3 * nelements + elem2) * nelements + elem1) * idxb_max + jjb; if (j2 == j) bfactor = 2.0; else bfactor = 1.0; } else { const int jjb = idxb_block[j2][j][j1]; itriple = ((elem2 * nelements + elem3) * nelements + elem1) * idxb_max + jjb; bfactor = 1.0; } if (!bnorm_flag && j1 > j) bfactor *= (1.0 + j1) / (1.0 + j); return bfactor; } /* ---------------------------------------------------------------------- compute Yi from Ui without storing Zi, looping over zlist indices ------------------------------------------------------------------------- */ template void SNAIntel::compute_zi_or_yi(const SNA_DVEC* beta) { if (COMPUTE_YI) { memset(ylist_r,0,idxu_max*nelements*sizeof(SNA_DVEC)); memset(ylist_i,0,idxu_max*nelements*sizeof(SNA_DVEC)); } double *zlist_rp = (double *)zlist_r; double *zlist_ip = (double *)zlist_i; int zlist_i = 0; for (int elem1 = 0; elem1 < nelements; elem1++) for (int elem2 = 0; elem2 < nelements; elem2++) { for (int jjz = 0; jjz < idxz_max; jjz++) { const int j1 = idxz[jjz].j1; const int j2 = idxz[jjz].j2; const int j = idxz[jjz].j; const int ma1min = idxz[jjz].ma1min; const int ma2max = idxz[jjz].ma2max; const int na = idxz[jjz].na; const int mb1min = idxz[jjz].mb1min; const int mb2max = idxz[jjz].mb2max; const int nb = idxz[jjz].nb; const double *cgblock = cglist + idxcg_block[j1][j2][j]; SNA_DVEC ztmp_r = 0.0; SNA_DVEC ztmp_i = 0.0; const double *u_r = (double *)ulisttot_r; const double *u_i = (double *)ulisttot_i; int jju1 = elem1 * idxu_max + idxu_block[j1] + (j1 + 1) * mb1min; int jju2 = elem2 * idxu_max + idxu_block[j2] + (j2 + 1) * mb2max; jju1 *= vector_width(); jju2 *= vector_width(); int icgb = mb1min * (j2 + 1) + mb2max; for (int ib = 0; ib < nb; ib++) { SNA_DVEC suma1_r = 0.0; SNA_DVEC suma1_i = 0.0; int ma1 = ma1min * vector_width(); int ma2 = ma2max * vector_width(); int icga = ma1min * (j2 + 1) + ma2max; for (int ia = 0; ia < na; ia++) { const SNA_DVEC u1_r = SIMD_load(u_r + jju1 + ma1); const SNA_DVEC u2_r = SIMD_load(u_r + jju2 + ma2); const SNA_DVEC u1_i = SIMD_load(u_i + jju1 + ma1); const SNA_DVEC u2_i = SIMD_load(u_i + jju2 + ma2); suma1_r += (u1_r*u2_r - u1_i*u2_i) * cgblock[icga]; suma1_i += (u1_r*u2_i + u1_i*u2_r) * cgblock[icga]; ma1+= vector_width(); ma2-= vector_width(); icga += j2; } // end loop over ia ztmp_r += suma1_r * cgblock[icgb]; ztmp_i += suma1_i * cgblock[icgb]; jju1 += (j1 + 1) * vector_width(); jju2 -= (j2 + 1) * vector_width(); icgb += j2; } // end loop over ib // apply to z(j1,j2,j,ma,mb) to unique element of y(j) // find right y_list[jju] and beta[jjb] entries // multiply and divide by j+1 factors // account for multiplicity of 1, 2, or 3 if (bnorm_flag) { ztmp_i *= SIMD_rcp(SIMD_set(static_cast(j+1))); ztmp_r *= SIMD_rcp(SIMD_set(static_cast(j+1))); } if (COMPUTE_YI) { int jju = idxz[jjz].jju; for (int elem3 = 0; elem3 < nelements; elem3++) { int itriple; double bfactor = choose_beta(j, j1, j2, elem1, elem2, elem3, itriple); const SNA_DVEC betaj = beta[itriple] * bfactor; const int i = elem3 * idxu_max + jju; SIMD_store(&(ylist_r[i]), SIMD_load(ylist_r + i) + betaj * ztmp_r); SIMD_store(&(ylist_i[i]), SIMD_load(ylist_i + i) + betaj * ztmp_i); } } else { SIMD_store(zlist_rp + zlist_i, ztmp_r); SIMD_store(zlist_ip + zlist_i, ztmp_i); zlist_i += vector_width(); } }// end loop over jjz } } /* ---------------------------------------------------------------------- compute Yi from Zi ------------------------------------------------------------------------- */ void SNAIntel::compute_yi_from_zi(const SNA_DVEC* beta) { memset(ylist_r,0,idxu_max*nelements*sizeof(SNA_DVEC)); memset(ylist_i,0,idxu_max*nelements*sizeof(SNA_DVEC)); double *zlist_rp = (double *)zlist_r; double *zlist_ip = (double *)zlist_i; int zlist_i = 0; for (int elem1 = 0; elem1 < nelements; elem1++) for (int elem2 = 0; elem2 < nelements; elem2++) { for (int jjz = 0; jjz < idxz_max; jjz++) { const int j1 = idxz[jjz].j1; const int j2 = idxz[jjz].j2; const int j = idxz[jjz].j; const SNA_DVEC ztmp_r = SIMD_load(zlist_rp + zlist_i); const SNA_DVEC ztmp_i = SIMD_load(zlist_ip + zlist_i); zlist_i += vector_width(); int jju = idxz[jjz].jju; for (int elem3 = 0; elem3 < nelements; elem3++) { int itriple; double bfactor = choose_beta(j, j1, j2, elem1, elem2, elem3, itriple); const SNA_DVEC betaj = beta[itriple] * bfactor; const int i = elem3 * idxu_max + jju; SIMD_store(&(ylist_r[i]), SIMD_load(ylist_r + i) + betaj * ztmp_r); SIMD_store(&(ylist_i[i]), SIMD_load(ylist_i + i) + betaj * ztmp_i); } } // end loop over jjz } } /* ---------------------------------------------------------------------- compute dEidRj ------------------------------------------------------------------------- */ void SNAIntel::compute_deidrj_e(const int jj, const SNA_IVEC &jnum, SNA_DVEC* dedr) { double *ylist_rp = (double *)ylist_r; double *ylist_ip = (double *)ylist_i; double *dulist_rp = (double *)(dulist_r[0]); double *dulist_ip = (double *)(dulist_i[0]); for (int k = 0; k < 3; k++) dedr[k] = SIMD_set(0.0); SNA_IVEC jelem; if (chem_flag) jelem = SIMD_load(element + jj); else jelem = SIMD256_set(0); SIMD_mask m(jj < jnum); for (int j = 0; j <= twojmax; j++) { int jju = idxu_block[j] * vector_width(); int jju3 = jju * 3; SNA_IVEC i = jelem*idxu_max*vector_width() + jju + SIMD256_count(); for (int mb = 0; 2*mb < j; mb++) for (int ma = 0; ma <= j; ma++) { SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i); SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = SIMD_load(dulist_rp + jju3); SNA_DVEC du_i = SIMD_load(dulist_ip + jju3); SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i; dedr[k] = SIMD_add(m, dedr[k], du); jju3 += vector_width(); } i = i + vector_width(); } if (j%2 == 0) { int mb = j / 2; for (int ma = 0; ma < mb; ma++) { SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i); SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = SIMD_load(dulist_rp + jju3); SNA_DVEC du_i = SIMD_load(dulist_ip + jju3); SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i; dedr[k] = SIMD_add(m, dedr[k], du); jju3 += vector_width(); } i = i + vector_width(); } SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i); SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = SIMD_load(dulist_rp + jju3); SNA_DVEC du_i = SIMD_load(dulist_ip + jju3); SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i; dedr[k] = SIMD_fma(m, SIMD_set(0.5), du, dedr[k]); jju3 += vector_width(); } } // if j%2 } // for j for (int k = 0; k < 3; k++) dedr[k] = dedr[k] * 2.0; } /* ---------------------------------------------------------------------- compute dEidRj ------------------------------------------------------------------------- */ void SNAIntel::compute_deidrj(const int jj, const SNA_IVEC &jnum, SNA_DVEC* dedr) { double *ylist_rp = (double *)ylist_r; double *ylist_ip = (double *)ylist_i; double *dulist_rp = (double *)(dulist_r[0]); double *dulist_ip = (double *)(dulist_i[0]); for (int k = 0; k < 3; k++) dedr[k] = SIMD_set(0.0); SIMD_mask m(jj < jnum); for (int j = 0; j <= twojmax; j++) { int jju = idxu_block[j] * vector_width(); int jju3 = jju * 3; for (int mb = 0; 2*mb < j; mb++) for (int ma = 0; ma <= j; ma++) { SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju); SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = SIMD_load(dulist_rp + jju3); SNA_DVEC du_i = SIMD_load(dulist_ip + jju3); SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i; dedr[k] = SIMD_add(m, dedr[k], du); jju3 += vector_width(); } jju += vector_width(); } if (j%2 == 0) { int mb = j / 2; for (int ma = 0; ma < mb; ma++) { SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju); SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = SIMD_load(dulist_rp + jju3); SNA_DVEC du_i = SIMD_load(dulist_ip + jju3); SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i; dedr[k] = SIMD_add(m, dedr[k], du); jju3 += vector_width(); } jju += vector_width(); } SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju); SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = SIMD_load(dulist_rp + jju3); SNA_DVEC du_i = SIMD_load(dulist_ip + jju3); SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i; dedr[k] = SIMD_fma(m, SIMD_set(0.5), du, dedr[k]); jju3 += vector_width(); } } // if j%2 } // for j for (int k = 0; k < 3; k++) dedr[k] = dedr[k] * 2.0; } /* ---------------------------------------------------------------------- compute Bi by summing conj(Ui)*Zi ------------------------------------------------------------------------- */ void SNAIntel::compute_bi(const SNA_IVEC &ielem) { // for j1 = 0,...,twojmax // for j2 = 0,twojmax // for j = |j1-j2|,Min(twojmax,j1+j2),2 // b(j1,j2,j) = 0 // for mb = 0,...,jmid // for ma = 0,...,j // b(j1,j2,j) += // 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb) double *ulisttot_rp = (double *)ulisttot_r; double *ulisttot_ip = (double *)ulisttot_i; double *blistp = (double *)blist; int itriple = 0; int idouble = 0; for (int elem1 = 0; elem1 < nelements; elem1++) for (int elem2 = 0; elem2 < nelements; elem2++) { double *zlist_rp = (double *)(zlist_r + idouble*idxz_max); double *zlist_ip = (double *)(zlist_i + idouble*idxz_max); for (int elem3 = 0; elem3 < nelements; elem3++) { for (int jjb = 0; jjb < idxb_max; jjb++) { const int j1 = idxb[jjb].j1; const int j2 = idxb[jjb].j2; const int j = idxb[jjb].j; int jjz = idxz_block[j1][j2][j] * vector_width(); int jju = (elem3 * idxu_max + idxu_block[j]) * vector_width(); SNA_DVEC sumzu(0.0); for (int mb = 0; 2 * mb < j; mb++) for (int ma = 0; ma <= j; ma++) { const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju); const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju); const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz); const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz); sumzu = sumzu + utot_r * z_r + utot_i * z_i; jjz += vector_width(); jju += vector_width(); } // end loop over ma, mb // For j even, handle middle column if (j % 2 == 0) { int mb = j / 2; for (int ma = 0; ma < mb; ma++) { const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju); const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju); const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz); const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz); sumzu = sumzu + utot_r * z_r + utot_i * z_i; jjz += vector_width(); jju += vector_width(); } const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju); const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju); const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz); const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz); sumzu = sumzu + (utot_r * z_r + utot_i * z_i) * 0.5; } // end if jeven SIMD_store(blistp + (itriple*idxb_max+jjb) * vector_width(), sumzu * 2.0); } itriple++; } idouble++; } // apply bzero shift if (bzero_flag) { if (!wselfall_flag) { SNA_IVEC itriplev = (ielem*nelements+ielem)*nelements+ielem; for (int jjb = 0; jjb < idxb_max; jjb++) { const int j = idxb[jjb].j; SNA_IVEC i = (itriplev*idxb_max+jjb) * vector_width() + SIMD256_count(); SIMD_scatter(blistp, i, SIMD_gather(blistp, i) - bzero[j]); } // end loop over JJ } else { int itriple = 0; for (int elem1 = 0; elem1 < nelements; elem1++) for (int elem2 = 0; elem2 < nelements; elem2++) { for (int elem3 = 0; elem3 < nelements; elem3++) { for (int jjb = 0; jjb < idxb_max; jjb++) { const int j = idxb[jjb].j; int i = (itriple*idxb_max+jjb) * vector_width(); SIMD_store(blistp + i, SIMD_load(blistp + i) - bzero[j]); } // end loop over JJ itriple++; } // end loop over elem3 } // end loop over elem1,elem2 } } } /* ---------------------------------------------------------------------- calculate derivative of Ui w.r.t. atom j ------------------------------------------------------------------------- */ void SNAIntel::compute_duidrj(const int jj, const SNA_IVEC &jnum) { const SNA_DVEC x = rij[jj][0]; const SNA_DVEC y = rij[jj][1]; const SNA_DVEC z = rij[jj][2]; const SNA_DVEC rcut = rcutij[jj]; const SNA_DVEC rsq = x * x + y * y + z * z; const SNA_DVEC r = SIMD_sqrt(rsq); const SNA_DVEC rscale0 = SIMD_rcp(rcut - rmin0) * rfac0 * MY_PI; const SNA_DVEC theta0 = (r - rmin0) * rscale0; const SNA_DVEC z0 = r * SIMD_rcp(SIMD_tan(theta0)); const SNA_DVEC dz0dr = z0 * SIMD_rcp(r) - (r*rscale0) * (rsq + z0 * z0) * SIMD_rcp(rsq); compute_duarray(x, y, z, z0, r, dz0dr, wj[jj], rcut, jj, jnum); } /* ---------------------------------------------------------------------- */ void SNAIntel::zero_uarraytot(const SNA_IVEC &ielem) { double *ulisttot_rp = (double *)ulisttot_r; double *ulisttot_ip = (double *)ulisttot_i; for (int jelem = 0; jelem < nelements; jelem++) for (int j = 0; j <= twojmax; j++) { int jju = (jelem * idxu_max + idxu_block[j]) * vector_width(); for (int mb = 0; mb <= j; mb++) { for (int ma = 0; ma <= j; ma++) { SIMD_store(ulisttot_rp + jju, SIMD_set(0.0)); SIMD_store(ulisttot_ip + jju, SIMD_set(0.0)); // utot(j,ma,ma) = wself, sometimes if (ma == mb) { if (wselfall_flag || nelements == 1) SIMD_store(ulisttot_rp + jju, SIMD_set(wself)); else { SIMD_mask m(ielem == jelem); SIMD_store(ulisttot_rp + jju, SIMD_zero_masked(~m, SIMD_set(wself))); } } jju += vector_width(); } } } } /* ---------------------------------------------------------------------- add Wigner U-functions for one neighbor to the total ------------------------------------------------------------------------- */ void SNAIntel::add_uarraytot(const SNA_DVEC &r, const int jj, const SNA_IVEC &jnum) { SNA_DVEC sfac = compute_sfac(r, rcutij[jj], sinnerij[jj], dinnerij[jj]); sfac *= wj[jj]; double *ulisttot_rp = (double *)ulisttot_r; double *ulisttot_ip = (double *)ulisttot_i; const double* ulist_r = (double *)(ulist_r_ij[jj]); const double* ulist_i = (double *)(ulist_i_ij[jj]); SIMD_mask m(jj < jnum); if (chem_flag && nelements > 1) { SNA_IVEC jelem = SIMD_load(element+jj); for (int j = 0; j <= twojmax; j++) { int jju = idxu_block[j] * vector_width(); SNA_IVEC i = jelem*idxu_max*vector_width() + jju + SIMD256_count(); for (int mb = 0; mb <= j; mb++) for (int ma = 0; ma <= j; ma++) { SNA_DVEC utot_r = SIMD_gather(m, ulisttot_rp, i); SNA_DVEC utot_i = SIMD_gather(m, ulisttot_ip, i); utot_r = SIMD_fma(m, sfac, SIMD_load(ulist_r + jju), utot_r); utot_i = SIMD_fma(m, sfac, SIMD_load(ulist_i + jju), utot_i); SIMD_scatter(m, ulisttot_rp, i, utot_r); SIMD_scatter(m, ulisttot_ip, i, utot_i); jju += vector_width(); i = i + vector_width(); } } } else { for (int j = 0; j <= twojmax; j++) { int jju = idxu_block[j] * vector_width(); for (int mb = 0; mb <= j; mb++) for (int ma = 0; ma <= j; ma++) { SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju); SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju); utot_r = SIMD_fma(m, sfac, SIMD_load(ulist_r + jju), utot_r); utot_i = SIMD_fma(m, sfac, SIMD_load(ulist_i + jju), utot_i); SIMD_store(ulisttot_rp + jju, utot_r); SIMD_store(ulisttot_ip + jju, utot_i); jju += vector_width(); } } } } /* ---------------------------------------------------------------------- compute Wigner U-functions for one neighbor ------------------------------------------------------------------------- */ void SNAIntel::compute_uarray(const SNA_DVEC &x, const SNA_DVEC &y, const SNA_DVEC &z, const SNA_DVEC &z0, const SNA_DVEC &r, const int jj, const SNA_IVEC &jnum) { // compute Cayley-Klein parameters for unit quaternion const SNA_DVEC r0inv = SIMD_invsqrt(r * r + z0 * z0); const SNA_DVEC a_r = z0 * r0inv; const SNA_DVEC a_i = -z * r0inv; const SNA_DVEC b_r = y * r0inv; const SNA_DVEC b_i = -x * r0inv; // VMK Section 4.8.2 double *ulist_rp = (double *)(ulist_r_ij[jj]); double *ulist_ip = (double *)(ulist_i_ij[jj]); SIMD_store(ulist_rp, SIMD_set(1.0)); SIMD_store(ulist_ip, SIMD_set(0.0)); for (int j = 1; j <= twojmax; j++) { int jju = idxu_block[j] * vector_width(); int jjup = idxu_block[j-1] * vector_width(); // fill in left side of matrix layer from previous layer for (int mb = 0; 2*mb <= j; mb++) { SIMD_store(ulist_rp + jju, SIMD_set(0.0)); SIMD_store(ulist_ip + jju, SIMD_set(0.0)); for (int ma = 0; ma < j; ma++) { double rootpq = rootpqarray[j - ma][j - mb]; SNA_DVEC u_r = SIMD_load(ulist_rp + jju); SNA_DVEC u_i = SIMD_load(ulist_ip + jju); const SNA_DVEC up_r = SIMD_load(ulist_rp + jjup); const SNA_DVEC up_i = SIMD_load(ulist_ip + jjup); SNA_DVEC u_ro, u_io; u_ro = a_r * up_r + a_i * up_i; u_r = SIMD_fma(SIMD_set(rootpq), u_ro, u_r); SIMD_store(ulist_rp + jju, u_r); u_io = a_r * up_i - a_i * up_r; u_i = SIMD_fma(SIMD_set(rootpq), u_io, u_i); SIMD_store(ulist_ip + jju, u_i); jju += vector_width(); rootpq = -rootpqarray[ma + 1][j - mb]; u_r = (b_r * up_r + b_i * up_i) * rootpq; SIMD_store(ulist_rp + jju, u_r); u_i = (b_r * up_i - b_i * up_r) * rootpq; SIMD_store(ulist_ip + jju, u_i); jjup += vector_width(); } jju += vector_width(); } // copy left side to right side with inversion symmetry VMK 4.4(2) // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) jju = idxu_block[j]; jjup = (jju+(j+1)*(j+1)-1) * vector_width(); jju *= vector_width(); int mbpar = 1; for (int mb = 0; 2*mb <= j; mb++) { int mapar = mbpar; for (int ma = 0; ma <= j; ma++) { if (mapar == 1) { SIMD_store(ulist_rp + jjup, SIMD_load(ulist_rp + jju)); SIMD_store(ulist_ip + jjup, -SIMD_load(ulist_ip + jju)); } else { SIMD_store(ulist_rp + jjup, -SIMD_load(ulist_rp + jju)); SIMD_store(ulist_ip + jjup, SIMD_load(ulist_ip + jju)); } mapar = -mapar; jju += vector_width(); jjup -= vector_width(); } mbpar = -mbpar; } } } /* ---------------------------------------------------------------------- Compute derivatives of Wigner U-functions for one neighbor see comments in compute_uarray() ------------------------------------------------------------------------- */ void SNAIntel::compute_duarray(const SNA_DVEC &x, const SNA_DVEC &y, const SNA_DVEC &z, const SNA_DVEC &z0, const SNA_DVEC &r, const SNA_DVEC &dz0dr, const SNA_DVEC &wj, const SNA_DVEC &rcut, const int jj, const SNA_IVEC &jnum) { const SNA_DVEC rinv = SIMD_rcp(r); const SNA_DVEC r0inv = SIMD_invsqrt(r * r + z0 * z0); SNA_DVEC up[3]; up[0] = x * rinv; up[1] = y * rinv; up[2] = z * rinv; const SNA_DVEC a_r = z0 * r0inv; const SNA_DVEC a_i = -z * r0inv; const SNA_DVEC b_r = y * r0inv; const SNA_DVEC b_i = -x * r0inv; const SNA_DVEC dr0invdr = -SIMD_pow(r0inv, 3.0) * (r + z0 * dz0dr); SNA_DVEC dr0inv[3], da_r[3], da_i[3]; for (int k = 0; k < 3; k++) { dr0inv[k] = dr0invdr * up[k]; da_r[k] = dz0dr * up[k] * r0inv + z0 * dr0inv[k]; da_i[k] = -z * dr0inv[k]; } da_i[2] += -r0inv; double *ulist_rp = (double *)(ulist_r_ij[jj]); double *ulist_ip = (double *)(ulist_i_ij[jj]); double *dulist_rp = (double *)(dulist_r[0]); double *dulist_ip = (double *)(dulist_i[0]); SNA_DVEC db_r[3], db_i[3]; for (int k = 0; k < 3; k++) { SIMD_store(dulist_rp + k * vector_width(), SIMD_set(0.0)); SIMD_store(dulist_ip + k * vector_width(), SIMD_set(0.0)); db_r[k] = y * dr0inv[k]; db_i[k] = -x * dr0inv[k]; } db_i[0] -= r0inv; db_r[1] += r0inv; for (int j = 1; j <= twojmax; j++) { int jju3 = idxu_block[j] * 3 * vector_width(); int jjup = idxu_block[j-1] * vector_width(); int jjup3 = jjup * 3; for (int mb = 0; 2*mb <= j; mb++) { for (int k = 0; k < 3; k++) { SIMD_store(dulist_rp + jju3 + k * vector_width(), SIMD_set(0.0)); SIMD_store(dulist_ip + jju3 + k * vector_width(), SIMD_set(0.0)); } for (int ma = 0; ma < j; ma++) { const double rootpq = rootpqarray[j - ma][j - mb]; const double mrootpq = -rootpqarray[ma + 1][j - mb]; const SNA_DVEC up_r = SIMD_load(ulist_rp + jjup); const SNA_DVEC up_i = SIMD_load(ulist_ip + jjup); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = SIMD_load(dulist_rp + jju3); SNA_DVEC du_i = SIMD_load(dulist_ip + jju3); const SNA_DVEC dup_r = SIMD_load(dulist_rp + jjup3); const SNA_DVEC dup_i = SIMD_load(dulist_ip + jjup3); SNA_DVEC du_ro, du_io; du_ro = (da_r[k]*up_r + da_i[k]*up_i + a_r*dup_r + a_i*dup_i); du_r = SIMD_fma(SIMD_set(rootpq), du_ro, du_r); SIMD_store(dulist_rp + jju3, du_r); du_io = (da_r[k]*up_i - da_i[k]*up_r + a_r*dup_i - a_i*dup_r); du_i = SIMD_fma(SIMD_set(rootpq), du_io, du_i); SIMD_store(dulist_ip + jju3, du_i); du_r = (db_r[k]*up_r + db_i[k]*up_i + b_r*dup_r + b_i*dup_i); SIMD_store(dulist_rp + jju3 + 3 * vector_width(), du_r * mrootpq); du_i = (db_r[k]*up_i - db_i[k]*up_r + b_r*dup_i - b_i*dup_r); SIMD_store(dulist_ip + jju3 + 3 * vector_width(), du_i * mrootpq); jju3 += vector_width(); jjup3 += vector_width(); } jjup += vector_width(); } // for ma jju3 += 3 * vector_width(); } // for mb // copy left side to right side with inversion symmetry VMK 4.4(2) // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) SNA_DVEC *du_r_p = dulist_r[0]; SNA_DVEC *du_i_p = dulist_i[0]; int jju = idxu_block[j]; jjup = (jju+(j+1)*(j+1)-1) * 3 * vector_width(); jju *= 3 * vector_width(); int mbpar = 1; for (int mb = 0; 2*mb <= j; mb++) { int mapar = mbpar; for (int ma = 0; ma <= j; ma++) { if (mapar == 1) { for (int k = 0; k < 3; k++) { SIMD_store(dulist_rp + jjup, SIMD_load(dulist_rp + jju)); SIMD_store(dulist_ip + jjup, -SIMD_load(dulist_ip + jju)); jju += vector_width(); jjup += vector_width(); } } else { for (int k = 0; k < 3; k++) { SIMD_store(dulist_rp + jjup, -SIMD_load(dulist_rp + jju)); SIMD_store(dulist_ip + jjup, SIMD_load(dulist_ip + jju)); jju += vector_width(); jjup += vector_width(); } } mapar = -mapar; jjup -= 6 * vector_width(); } // for ma mbpar = -mbpar; } // for mb } // for j SNA_DVEC dsfac; SNA_DVEC sfac = compute_sfac_dsfac(r, rcut, sinnerij[jj], dinnerij[jj], dsfac); sfac = sfac * wj; dsfac = dsfac * wj; for (int j = 0; j <= twojmax; j++) { int jju = idxu_block[j] * vector_width(); int jju3 = jju * 3; for (int mb = 0; 2*mb <= j; mb++) for (int ma = 0; ma <= j; ma++) { const SNA_DVEC ur_dsfac = dsfac * SIMD_load(ulist_rp + jju); const SNA_DVEC ui_dsfac = dsfac * SIMD_load(ulist_ip + jju); jju += vector_width(); for (int k = 0; k < 3; k++) { SNA_DVEC du_r = ur_dsfac * up[k] + sfac * SIMD_load(dulist_rp+jju3); SIMD_store(dulist_rp + jju3, du_r); SNA_DVEC du_i = ui_dsfac * up[k] + sfac * SIMD_load(dulist_ip+jju3); SIMD_store(dulist_ip + jju3, du_i); jju3 += vector_width(); } } } } /* ---------------------------------------------------------------------- memory usage of arrays ------------------------------------------------------------------------- */ double SNAIntel::memory_usage() { int jdimpq = twojmax + 2; int jdim = twojmax + 1; double bytes; bytes = 0; bytes += (double)jdimpq*jdimpq * sizeof(double); // pqarray bytes += (double)idxcg_max * sizeof(double); // cglist bytes += (double)nmax * idxu_max * sizeof(SNA_DVEC) * 2; // ulist_ij bytes += (double)idxu_max * nelements * sizeof(SNA_DVEC) * 2; // ulisttot bytes += (double)idxu_max * 3 * sizeof(SNA_DVEC) * 2; // dulist bytes += (double)idxz_max * ndoubles * sizeof(SNA_DVEC) * 2; // zlist bytes += (double)idxb_max * ntriples * sizeof(SNA_DVEC); // blist bytes += (double)idxb_max * ntriples * 3 * sizeof(double); // dblist bytes += (double)idxu_max * nelements * sizeof(SNA_DVEC) * 2; // ylist bytes += (double)jdim * jdim * jdim * sizeof(int); // idxcg_block bytes += (double)jdim * sizeof(int); // idxu_block bytes += (double)jdim * jdim * jdim * sizeof(int); // idxz_block bytes += (double)jdim * jdim * jdim * sizeof(int); // idxb_block bytes += (double)idxz_max * sizeof(SNA_ZINDICES); // idxz bytes += (double)idxb_max * sizeof(SNA_BINDICES); // idxb if (bzero_flag) bytes += (double)jdim * sizeof(double); // bzero bytes += (double)nmax * 3 * sizeof(SNA_DVEC); // rij bytes += (double)nmax * sizeof(SNA_IVEC); // inside bytes += (double)nmax * sizeof(SNA_DVEC); // wj bytes += (double)nmax * sizeof(SNA_DVEC); // rcutij bytes += (double)nmax * sizeof(SNA_DVEC); // sinnerij bytes += (double)nmax * sizeof(SNA_DVEC); // dinnerij if (chem_flag) bytes += (double)nmax * sizeof(SNA_IVEC); // element return bytes; } /* ---------------------------------------------------------------------- */ void SNAIntel::create_twojmax_arrays() { int jdimpq = twojmax + 2; memory->create(rootpqarray, jdimpq, jdimpq, "sna:rootpqarray"); memory->create(cglist, idxcg_max, "sna:cglist"); memory->create(ulisttot_r, idxu_max*nelements, "sna:ulisttot"); memory->create(ulisttot_i, idxu_max*nelements, "sna:ulisttot"); memory->create(dulist_r, idxu_max, 3, "sna:dulist"); memory->create(dulist_i, idxu_max, 3, "sna:dulist"); memory->create(zlist_r, idxz_max*ndoubles, "sna:zlist"); memory->create(zlist_i, idxz_max*ndoubles, "sna:zlist"); memory->create(blist, idxb_max*ntriples, "sna:blist"); memory->create(dblist, idxb_max*ntriples, 3, "sna:dblist"); memory->create(ylist_r, idxu_max*nelements, "sna:ylist"); memory->create(ylist_i, idxu_max*nelements, "sna:ylist"); if (bzero_flag) memory->create(bzero, twojmax+1,"sna:bzero"); else bzero = nullptr; } /* ---------------------------------------------------------------------- */ void SNAIntel::destroy_twojmax_arrays() { memory->destroy(rootpqarray); memory->destroy(cglist); memory->destroy(ulisttot_r); memory->destroy(ulisttot_i); memory->destroy(dulist_r); memory->destroy(dulist_i); memory->destroy(zlist_r); memory->destroy(zlist_i); memory->destroy(blist); memory->destroy(dblist); memory->destroy(ylist_r); memory->destroy(ylist_i); memory->destroy(idxcg_block); memory->destroy(idxu_block); memory->destroy(idxz_block); memory->destroy(idxb_block); if (bzero_flag) memory->destroy(bzero); } /* ---------------------------------------------------------------------- the function delta given by VMK Eq. 8.2(1) ------------------------------------------------------------------------- */ double SNAIntel::deltacg(int j1, int j2, int j) { double sfaccg = factorial((j1 + j2 + j) / 2 + 1); return sqrt(factorial((j1 + j2 - j) / 2) * factorial((j1 - j2 + j) / 2) * factorial((-j1 + j2 + j) / 2) / sfaccg); } /* ---------------------------------------------------------------------- assign Clebsch-Gordan coefficients using the quasi-binomial formula VMK 8.2.1(3) ------------------------------------------------------------------------- */ void SNAIntel::init_clebsch_gordan() { double sum,dcg,sfaccg; int m, aa2, bb2, cc2; int ifac; int idxcg_count = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { for (int m1 = 0; m1 <= j1; m1++) { aa2 = 2 * m1 - j1; for (int m2 = 0; m2 <= j2; m2++) { // -c <= cc <= c bb2 = 2 * m2 - j2; m = (aa2 + bb2 + j) / 2; if (m < 0 || m > j) { cglist[idxcg_count] = 0.0; idxcg_count++; continue; } sum = 0.0; for (int z = MAX(0, MAX(-(j - j2 + aa2) / 2, -(j - j1 - bb2) / 2)); z <= MIN((j1 + j2 - j) / 2, MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); z++) { ifac = z % 2 ? -1 : 1; sum += ifac / (factorial(z) * factorial((j1 + j2 - j) / 2 - z) * factorial((j1 - aa2) / 2 - z) * factorial((j2 + bb2) / 2 - z) * factorial((j - j2 + aa2) / 2 + z) * factorial((j - j1 - bb2) / 2 + z)); } cc2 = 2 * m - j; dcg = deltacg(j1, j2, j); sfaccg = sqrt(factorial((j1 + aa2) / 2) * factorial((j1 - aa2) / 2) * factorial((j2 + bb2) / 2) * factorial((j2 - bb2) / 2) * factorial((j + cc2) / 2) * factorial((j - cc2) / 2) * (j + 1)); cglist[idxcg_count] = sum * dcg * sfaccg; idxcg_count++; } } } } /* ---------------------------------------------------------------------- print out values of Clebsch-Gordan coefficients format and notation follows VMK Table 8.11 ------------------------------------------------------------------------- */ void SNAIntel::print_clebsch_gordan() { if (comm->me) return; int aa2, bb2, cc2; for (int j = 0; j <= twojmax; j += 1) { printf("c = %g\n",j/2.0); printf("a alpha b beta C_{a alpha b beta}^{c alpha+beta}\n"); for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) if (j1-j2 <= j && j1+j2 >= j && (j1+j2+j)%2 == 0) { int idxcg_count = idxcg_block[j1][j2][j]; for (int m1 = 0; m1 <= j1; m1++) { aa2 = 2*m1-j1; for (int m2 = 0; m2 <= j2; m2++) { bb2 = 2*m2-j2; double cgtmp = cglist[idxcg_count]; cc2 = aa2+bb2; if (cc2 >= -j && cc2 <= j) if (j1 != j2 || (aa2 > bb2 && aa2 >= -bb2) || (aa2 == bb2 && aa2 >= 0)) printf("%4g %4g %4g %4g %10.6g\n", j1/2.0,aa2/2.0,j2/2.0,bb2/2.0,cgtmp); idxcg_count++; } } } } } /* ---------------------------------------------------------------------- pre-compute table of sqrt[p/m2], p, q = 1,twojmax the p = 0, q = 0 entries are allocated and skipped for convenience. ------------------------------------------------------------------------- */ void SNAIntel::init_rootpqarray() { for (int p = 1; p <= twojmax; p++) for (int q = 1; q <= twojmax; q++) rootpqarray[p][q] = sqrt(static_cast(p)/q); } /* ---------------------------------------------------------------------- */ void SNAIntel::compute_ncoeff() { int ncount; ncount = 0; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) ncount++; ndoubles = nelements*nelements; ntriples = nelements*nelements*nelements; if (chem_flag) ncoeff = ncount*ntriples; else ncoeff = ncount; } /* ---------------------------------------------------------------------- */ double SNAIntel::compute_sfac(double r, double rcut, double sinner, double dinner) { double sfac; // calculate sfac = sfac_outer if (switch_flag == 0) sfac = 1.0; else if (r <= rmin0) sfac = 1.0; else if (r > rcut) sfac = 0.0; else { double rcutfac = MY_PI / (rcut - rmin0); sfac = 0.5 * (cos((r - rmin0) * rcutfac) + 1.0); } // calculate sfac *= sfac_inner, rarely visited if (switch_inner_flag == 1 && r < sinner + dinner) { if (r > sinner - dinner) { double rcutfac = MY_PI2 / dinner; sfac *= 0.5 * (1.0 - cos(MY_PI2 + (r - sinner) * rcutfac)); } else sfac = 0.0; } return sfac; } /* ---------------------------------------------------------------------- */ SNA_DVEC SNAIntel::compute_sfac(const SNA_DVEC &r, const SNA_DVEC &rcut, const SNA_DVEC &sinner, const SNA_DVEC &dinner) { // calculate sfac = sfac_outer // if (switch_flag == 0 || r <= rmin0) SNA_DVEC sfac = SIMD_set(1.0); if (switch_flag != 0) { // r <= rcut && r > rmin0 const SIMD_mask i(r > rmin0); const SIMD_mask m(r <= rcut); const SNA_DVEC rcutfac = SIMD_rcp(rcut - rmin0) * MY_PI; const SNA_DVEC sfac_m = (SIMD_cos((r - rmin0) * rcutfac) + 1.0) * 0.5; sfac = SIMD_set(sfac, m & i, sfac_m); // (r > rcut) && (r> rmin0) sfac = SIMD_zero_masked(m | i, sfac); } // calculate sfac *= sfac_inner, rarely visited if (switch_inner_flag == 1) { const SIMD_mask m(r < sinner + dinner); // if any(m) const SIMD_mask i(r > sinner - dinner); const SNA_DVEC rcutfac = SIMD_rcp(dinner) * MY_PI2; const SNA_DVEC sfac_m = (SIMD_set(1.0) - SIMD_cos((r-sinner) * rcutfac + MY_PI2)) * 0.5; sfac = SIMD_set(sfac, m & i, sfac_m); sfac = SIMD_zero_masked((~m) | i, sfac); } return sfac; } /* ---------------------------------------------------------------------- */ SNA_DVEC SNAIntel::compute_sfac_dsfac(const SNA_DVEC & r, const SNA_DVEC & rcut, const SNA_DVEC & sinner, const SNA_DVEC & dinner, SNA_DVEC &dsfac) { // calculate sfac = sfac_outer // if (switch_flag == 0 || r <= rmin0) SNA_DVEC sfac = SIMD_set(1.0); dsfac = SIMD_set(0.0); if (switch_flag != 0) { // r <= rcut && r > rmin0 const SIMD_mask i(r > rmin0); const SIMD_mask m(r <= rcut); const SNA_DVEC rcutfac = SIMD_rcp(rcut - rmin0) * MY_PI; const SNA_DVEC trig_arg = (r - rmin0) * rcutfac; const SNA_DVEC sfac_m = (SIMD_cos(trig_arg) + 1.0) * 0.5; const SNA_DVEC dsfac_m = SIMD_sin(trig_arg) * rcutfac * -0.5; sfac = SIMD_set(sfac, m & i, sfac_m); dsfac = SIMD_set(dsfac, m & i, dsfac_m); // (r > rcut) && (r> rmin0) sfac = SIMD_zero_masked(m | i, sfac); } // calculate sfac *= sfac_inner, rarely visited if (switch_inner_flag == 1) { const SIMD_mask m(r < sinner + dinner); const SIMD_mask i(r > sinner - dinner); if (any(m & i)) { const SNA_DVEC rcutfac = SIMD_rcp(dinner) * MY_PI2; const SNA_DVEC trig_arg = (r - sinner) * rcutfac + MY_PI2; const SNA_DVEC sfac_inner = (SIMD_set(1.0) - SIMD_cos(trig_arg)) * 0.5; const SNA_DVEC dsfac_inner = rcutfac * 0.5 * SIMD_sin(trig_arg); dsfac = SIMD_set(dsfac, m & i, dsfac * sfac_inner + sfac * dsfac_inner); sfac = SIMD_set(sfac, m & i, sfac_inner); } sfac = SIMD_zero_masked((~m) | i, sfac); dsfac = SIMD_zero_masked((~m) | i, dsfac); } return sfac; } template void SNAIntel::compute_zi_or_yi<1>(const SNA_DVEC *); template void SNAIntel::compute_zi_or_yi<0>(const SNA_DVEC *); #endif #endif