/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Aidan Thompson, Christian Trott, SNL ------------------------------------------------------------------------- */ #include "sna.h" #include #include "math_const.h" #include "math_extra.h" #include #include #include "openmp_snap.h" #include "memory.h" #include "error.h" #include "comm.h" #include "atom.h" using namespace std; using namespace LAMMPS_NS; using namespace MathConst; /* ---------------------------------------------------------------------- this implementation is based on the method outlined in Bartok[1], using formulae from VMK[2]. for the Clebsch-Gordan coefficients, we convert the VMK half-integral labels a, b, c, alpha, beta, gamma to array offsets j1, j2, j, m1, m2, m using the following relations: j1 = 2*a j2 = 2*b j = 2*c m1 = alpha+a 2*alpha = 2*m1 - j1 m2 = beta+b or 2*beta = 2*m2 - j2 m = gamma+c 2*gamma = 2*m - j in this way: -a <= alpha <= a -b <= beta <= b -c <= gamma <= c becomes: 0 <= m1 <= j1 0 <= m2 <= j2 0 <= m <= j and the requirement that a+b+c be integral implies that j1+j2+j must be even. The requirement that: gamma = alpha+beta becomes: 2*m - j = 2*m1 - j1 + 2*m2 - j2 Similarly, for the Wigner U-functions U(J,m,m') we convert the half-integral labels J,m,m' to array offsets j,ma,mb: j = 2*J ma = J+m mb = J+m' so that: 0 <= j <= 2*Jmax 0 <= ma, mb <= j. For the bispectrum components B(J1,J2,J) we convert to: j1 = 2*J1 j2 = 2*J2 j = 2*J and the requirement: |J1-J2| <= J <= J1+J2, for j1+j2+j integral becomes: |j1-j2| <= j <= j1+j2, for j1+j2+j even integer or j = |j1-j2|, |j1-j2|+2,...,j1+j2-2,j1+j2 [1] Albert Bartok-Partay, "Gaussian Approximation..." Doctoral Thesis, Cambrindge University, (2009) [2] D. A. Varshalovich, A. N. Moskalev, and V. K. Khersonskii, "Quantum Theory of Angular Momentum," World Scientific (1988) ------------------------------------------------------------------------- */ SNA::SNA(LAMMPS* lmp, double rfac0_in, int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in, double rmin0_in, int switch_flag_in) : Pointers(lmp) { wself = 1.0; use_shared_arrays = use_shared_arrays_in; rfac0 = rfac0_in; rmin0 = rmin0_in; switch_flag = switch_flag_in; twojmax = twojmax_in; diagonalstyle = diagonalstyle_in; ncoeff = compute_ncoeff(); create_twojmax_arrays(); bvec = NULL; dbvec = NULL; memory->create(bvec, ncoeff, "pair:bvec"); memory->create(dbvec, ncoeff, 3, "pair:dbvec"); rij = NULL; inside = NULL; wj = NULL; rcutij = NULL; nmax = 0; idxj = NULL; #ifdef TIMING_INFO timers = new double[20]; for(int i = 0; i < 20; i++) timers[i] = 0; print = 0; counter = 0; #endif build_indexlist(); } /* ---------------------------------------------------------------------- */ SNA::~SNA() { if(!use_shared_arrays) { destroy_twojmax_arrays(); memory->destroy(rij); memory->destroy(inside); memory->destroy(wj); memory->destroy(rcutij); memory->destroy(bvec); memory->destroy(dbvec); } delete[] idxj; } void SNA::build_indexlist() { if(diagonalstyle == 0) { int idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) idxj_count++; // indexList can be changed here idxj = new SNA_LOOPINDICES[idxj_count]; idxj_max = idxj_count; idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { idxj[idxj_count].j1 = j1; idxj[idxj_count].j2 = j2; idxj[idxj_count].j = j; idxj_count++; } } if(diagonalstyle == 1) { int idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) { idxj_count++; } // indexList can be changed here idxj = new SNA_LOOPINDICES[idxj_count]; idxj_max = idxj_count; idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) { idxj[idxj_count].j1 = j1; idxj[idxj_count].j2 = j1; idxj[idxj_count].j = j; idxj_count++; } } if(diagonalstyle == 2) { int idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) { idxj_count++; } // indexList can be changed here idxj = new SNA_LOOPINDICES[idxj_count]; idxj_max = idxj_count; idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) { idxj[idxj_count].j1 = j1; idxj[idxj_count].j2 = j1; idxj[idxj_count].j = j1; idxj_count++; } } if(diagonalstyle == 3) { int idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) idxj_count++; // indexList can be changed here idxj = new SNA_LOOPINDICES[idxj_count]; idxj_max = idxj_count; idxj_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) { idxj[idxj_count].j1 = j1; idxj[idxj_count].j2 = j2; idxj[idxj_count].j = j; idxj_count++; } } } /* ---------------------------------------------------------------------- */ void SNA::init() { init_clebsch_gordan(); init_rootpqarray(); } void SNA::grow_rij(int newnmax) { if(newnmax <= nmax) return; nmax = newnmax; if(!use_shared_arrays) { memory->destroy(rij); memory->destroy(inside); memory->destroy(wj); memory->destroy(rcutij); memory->create(rij, nmax, 3, "pair:rij"); memory->create(inside, nmax, "pair:inside"); memory->create(wj, nmax, "pair:wj"); memory->create(rcutij, nmax, "pair:rcutij"); } } /* ---------------------------------------------------------------------- compute Ui by summing over neighbors j ------------------------------------------------------------------------- */ void SNA::compute_ui(int jnum) { double rsq, r, x, y, z, z0, theta0; // utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,ma) = 1 for all j,ma // for j in neighbors of i: // compute r0 = (x,y,z,z0) // utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb zero_uarraytot(); addself_uarraytot(wself); #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &starttime); #endif for(int j = 0; j < jnum; j++) { x = rij[j][0]; y = rij[j][1]; z = rij[j][2]; rsq = x * x + y * y + z * z; r = sqrt(rsq); theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0); // theta0 = (r - rmin0) * rscale0; z0 = r / tan(theta0); compute_uarray(x, y, z, z0, r); add_uarraytot(r, wj[j], rcutij[j]); } #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &endtime); timers[0] += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); #endif } void SNA::compute_ui_omp(int jnum, int sub_threads) { double rsq, r, x, y, z, z0, theta0; // utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,ma) = 1 for all j,ma // for j in neighbors of i: // compute r0 = (x,y,z,z0) // utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb zero_uarraytot(); addself_uarraytot(wself); for(int j = 0; j < jnum; j++) { x = rij[j][0]; y = rij[j][1]; z = rij[j][2]; rsq = x * x + y * y + z * z; r = sqrt(rsq); theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0); // theta0 = (r - rmin0) * rscale0; z0 = r / tan(theta0); omp_set_num_threads(sub_threads); #if defined(_OPENMP) #pragma omp parallel shared(x,y,z,z0,r,sub_threads) default(none) #endif { compute_uarray_omp(x, y, z, z0, r, sub_threads); } add_uarraytot(r, wj[j], rcutij[j]); } } /* ---------------------------------------------------------------------- compute Zi by summing over products of Ui ------------------------------------------------------------------------- */ void SNA::compute_zi() { // for j1 = 0,...,twojmax // for j2 = 0,twojmax // for j = |j1-j2|,Min(twojmax,j1+j2),2 // for ma = 0,...,j // for mb = 0,...,jmid // z(j1,j2,j,ma,mb) = 0 // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) // sumb1 = 0 // ma2 = ma-ma1+(j1+j2-j)/2; // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) // mb2 = mb-mb1+(j1+j2-j)/2; // sumb1 += cg(j1,mb1,j2,mb2,j) * // u(j1,ma1,mb1) * u(j2,ma2,mb2) // z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j) #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &starttime); #endif // compute_dbidrj() requires full j1/j2/j chunk of z elements // use zarray j1/j2 symmetry for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) { for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { double sumb1_r, sumb1_i; int ma2, mb2; for(int mb = 0; 2*mb <= j; mb++) for(int ma = 0; ma <= j; ma++) { zarray_r[j1][j2][j][ma][mb] = 0.0; zarray_i[j1][j2][j][ma][mb] = 0.0; for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) { sumb1_r = 0.0; sumb1_i = 0.0; ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2); mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) { mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2; sumb1_r += cgarray[j1][j2][j][mb1][mb2] * (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] - uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]); sumb1_i += cgarray[j1][j2][j][mb1][mb2] * (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] + uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]); } // end loop over mb1 zarray_r[j1][j2][j][ma][mb] += sumb1_r * cgarray[j1][j2][j][ma1][ma2]; zarray_i[j1][j2][j][ma][mb] += sumb1_i * cgarray[j1][j2][j][ma1][ma2]; } // end loop over ma1 } // end loop over ma, mb } // end loop over j } // end loop over j1, j2 #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &endtime); timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); #endif } void SNA::compute_zi_omp(int sub_threads) { // for j1 = 0,...,twojmax // for j2 = 0,twojmax // for j = |j1-j2|,Min(twojmax,j1+j2),2 // for ma = 0,...,j // for mb = 0,...,j // z(j1,j2,j,ma,mb) = 0 // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) // sumb1 = 0 // ma2 = ma-ma1+(j1+j2-j)/2; // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) // mb2 = mb-mb1+(j1+j2-j)/2; // sumb1 += cg(j1,mb1,j2,mb2,j) * // u(j1,ma1,mb1) * u(j2,ma2,mb2) // z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j) if(omp_in_parallel()) omp_set_num_threads(sub_threads); // compute_dbidrj() requires full j1/j2/j chunk of z elements // use zarray j1/j2 symmetry #if defined(_OPENMP) #pragma omp parallel for schedule(auto) default(none) #endif for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { double sumb1_r, sumb1_i; int ma2, mb2; for(int ma = 0; ma <= j; ma++) for(int mb = 0; mb <= j; mb++) { zarray_r[j1][j2][j][ma][mb] = 0.0; zarray_i[j1][j2][j][ma][mb] = 0.0; for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) { sumb1_r = 0.0; sumb1_i = 0.0; ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2); mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) { mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2; sumb1_r += cgarray[j1][j2][j][mb1][mb2] * (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] - uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]); sumb1_i += cgarray[j1][j2][j][mb1][mb2] * (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] + uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]); } zarray_r[j1][j2][j][ma][mb] += sumb1_r * cgarray[j1][j2][j][ma1][ma2]; zarray_i[j1][j2][j][ma][mb] += sumb1_i * cgarray[j1][j2][j][ma1][ma2]; } } } } /* ---------------------------------------------------------------------- compute Bi by summing conj(Ui)*Zi ------------------------------------------------------------------------- */ void SNA::compute_bi() { // for j1 = 0,...,twojmax // for j2 = 0,twojmax // for j = |j1-j2|,Min(twojmax,j1+j2),2 // b(j1,j2,j) = 0 // for mb = 0,...,jmid // for ma = 0,...,j // b(j1,j2,j) += // 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb) #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &starttime); #endif for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) { for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { barray[j1][j2][j] = 0.0; for(int mb = 0; 2*mb < j; mb++) { for(int ma = 0; ma <= j; ma++) { barray[j1][j2][j] += uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb]; } } // For j even, special treatment for middle column if (j%2 == 0) { int mb = j/2; for(int ma = 0; ma < mb; ma++) barray[j1][j2][j] += uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb]; int ma = mb; barray[j1][j2][j] += (uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb])*0.5; } barray[j1][j2][j] *= 2.0; } } #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &endtime); timers[2] += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); #endif } /* ---------------------------------------------------------------------- copy Bi array to a vector ------------------------------------------------------------------------- */ void SNA::copy_bi2bvec() { int ncount, j1, j2, j; ncount = 0; for(j1 = 0; j1 <= twojmax; j1++) if(diagonalstyle == 0) { for(j2 = 0; j2 <= j1; j2++) for(j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { bvec[ncount] = barray[j1][j2][j]; ncount++; } } else if(diagonalstyle == 1) { j2 = j1; for(j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { bvec[ncount] = barray[j1][j2][j]; ncount++; } } else if(diagonalstyle == 2) { j = j2 = j1; bvec[ncount] = barray[j1][j2][j]; ncount++; } else if(diagonalstyle == 3) { for(j2 = 0; j2 <= j1; j2++) for(j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) { bvec[ncount] = barray[j1][j2][j]; ncount++; } } } /* ---------------------------------------------------------------------- calculate derivative of Ui w.r.t. atom j ------------------------------------------------------------------------- */ void SNA::compute_duidrj(double* rij, double wj, double rcut) { double rsq, r, x, y, z, z0, theta0, cs, sn; double dz0dr; x = rij[0]; y = rij[1]; z = rij[2]; rsq = x * x + y * y + z * z; r = sqrt(rsq); double rscale0 = rfac0 * MY_PI / (rcut - rmin0); theta0 = (r - rmin0) * rscale0; cs = cos(theta0); sn = sin(theta0); z0 = r * cs / sn; dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &starttime); #endif compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut); #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &endtime); timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); #endif } /* ---------------------------------------------------------------------- calculate derivative of Bi w.r.t. atom j variant using indexlist for j1,j2,j variant not using symmetry relation ------------------------------------------------------------------------- */ void SNA::compute_dbidrj_nonsymm() { // for j1 = 0,...,twojmax // for j2 = 0,twojmax // for j = |j1-j2|,Min(twojmax,j1+j2),2 // dbdr(j1,j2,j) = 0 // for ma = 0,...,j // for mb = 0,...,j // dzdr = 0 // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) // sumb1 = 0 // ma2 = ma-ma1+(j1+j2-j)/2; // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) // mb2 = mb-mb1+(j1+j2-j)/2; // sumb1 += cg(j1,mb1,j2,mb2,j) * // (dudr(j1,ma1,mb1) * u(j2,ma2,mb2) + // u(j1,ma1,mb1) * dudr(j2,ma2,mb2)) // dzdr += sumb1*cg(j1,ma1,j2,ma2,j) // dbdr(j1,j2,j) += // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + // Conj(u(j,ma,mb))*dzdr double* dbdr; double* dudr_r, *dudr_i; double sumb1_r[3], sumb1_i[3], dzdr_r[3], dzdr_i[3]; int ma2; #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &starttime); #endif for(int JJ = 0; JJ < idxj_max; JJ++) { const int j1 = idxj[JJ].j1; const int j2 = idxj[JJ].j2; const int j = idxj[JJ].j; dbdr = dbarray[j1][j2][j]; dbdr[0] = 0.0; dbdr[1] = 0.0; dbdr[2] = 0.0; double** *j1duarray_r = duarray_r[j1]; double** *j2duarray_r = duarray_r[j2]; double** *j1duarray_i = duarray_i[j1]; double** *j2duarray_i = duarray_i[j2]; double** j1uarraytot_r = uarraytot_r[j1]; double** j2uarraytot_r = uarraytot_r[j2]; double** j1uarraytot_i = uarraytot_i[j1]; double** j2uarraytot_i = uarraytot_i[j2]; double** j1j2jcgarray = cgarray[j1][j2][j]; for(int ma = 0; ma <= j; ma++) for(int mb = 0; mb <= j; mb++) { dzdr_r[0] = 0.0; dzdr_r[1] = 0.0; dzdr_r[2] = 0.0; dzdr_i[0] = 0.0; dzdr_i[1] = 0.0; dzdr_i[2] = 0.0; const int max_mb1 = MIN(j1, (2 * mb - j + j2 + j1) / 2) + 1; const int max_ma1 = MIN(j1, (2 * ma - j + j2 + j1) / 2) + 1; for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); ma1 < max_ma1; ma1++) { ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; sumb1_r[0] = 0.0; sumb1_r[1] = 0.0; sumb1_r[2] = 0.0; sumb1_i[0] = 0.0; sumb1_i[1] = 0.0; sumb1_i[2] = 0.0; //inside loop 54 operations (mul and add) for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2), mb2 = mb + (j1 + j2 - j) / 2 - mb1; mb1 < max_mb1; mb1++, mb2--) { double* dudr1_r, *dudr1_i, *dudr2_r, *dudr2_i; dudr1_r = j1duarray_r[ma1][mb1]; dudr2_r = j2duarray_r[ma2][mb2]; dudr1_i = j1duarray_i[ma1][mb1]; dudr2_i = j2duarray_i[ma2][mb2]; const double cga_mb1mb2 = j1j2jcgarray[mb1][mb2]; const double uat_r_ma2mb2 = cga_mb1mb2 * j2uarraytot_r[ma2][mb2]; const double uat_r_ma1mb1 = cga_mb1mb2 * j1uarraytot_r[ma1][mb1]; const double uat_i_ma2mb2 = cga_mb1mb2 * j2uarraytot_i[ma2][mb2]; const double uat_i_ma1mb1 = cga_mb1mb2 * j1uarraytot_i[ma1][mb1]; for(int k = 0; k < 3; k++) { sumb1_r[k] += dudr1_r[k] * uat_r_ma2mb2; sumb1_r[k] -= dudr1_i[k] * uat_i_ma2mb2; sumb1_i[k] += dudr1_r[k] * uat_i_ma2mb2; sumb1_i[k] += dudr1_i[k] * uat_r_ma2mb2; sumb1_r[k] += dudr2_r[k] * uat_r_ma1mb1; sumb1_r[k] -= dudr2_i[k] * uat_i_ma1mb1; sumb1_i[k] += dudr2_r[k] * uat_i_ma1mb1; sumb1_i[k] += dudr2_i[k] * uat_r_ma1mb1; } } // end loop over mb1,mb2 // dzdr += sumb1*cg(j1,ma1,j2,ma2,j) dzdr_r[0] += sumb1_r[0] * j1j2jcgarray[ma1][ma2]; dzdr_r[1] += sumb1_r[1] * j1j2jcgarray[ma1][ma2]; dzdr_r[2] += sumb1_r[2] * j1j2jcgarray[ma1][ma2]; dzdr_i[0] += sumb1_i[0] * j1j2jcgarray[ma1][ma2]; dzdr_i[1] += sumb1_i[1] * j1j2jcgarray[ma1][ma2]; dzdr_i[2] += sumb1_i[2] * j1j2jcgarray[ma1][ma2]; } // end loop over ma1,ma2 // dbdr(j1,j2,j) += // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + // Conj(u(j,ma,mb))*dzdr dudr_r = duarray_r[j][ma][mb]; dudr_i = duarray_i[j][ma][mb]; for(int k = 0; k < 3; k++) dbdr[k] += (dudr_r[k] * zarray_r[j1][j2][j][ma][mb] + dudr_i[k] * zarray_i[j1][j2][j][ma][mb]) + (uarraytot_r[j][ma][mb] * dzdr_r[k] + uarraytot_i[j][ma][mb] * dzdr_i[k]); } //end loop over ma mb } //end loop over j1 j2 j #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &endtime); timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); #endif } /* ---------------------------------------------------------------------- calculate derivative of Bi w.r.t. atom j variant using indexlist for j1,j2,j variant using symmetry relation ------------------------------------------------------------------------- */ void SNA::compute_dbidrj() { // for j1 = 0,...,twojmax // for j2 = 0,twojmax // for j = |j1-j2|,Min(twojmax,j1+j2),2 // zdb = 0 // for mb = 0,...,jmid // for ma = 0,...,j // zdb += // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) // dbdr(j1,j2,j) += 2*zdb // zdb = 0 // for mb1 = 0,...,j1mid // for ma1 = 0,...,j1 // zdb += // Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1) // dbdr(j1,j2,j) += 2*zdb*(j+1)/(j1+1) // zdb = 0 // for mb2 = 0,...,j2mid // for ma2 = 0,...,j2 // zdb += // Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2) // dbdr(j1,j2,j) += 2*zdb*(j+1)/(j2+1) double* dbdr; double* dudr_r, *dudr_i; double sumzdu_r[3]; double** jjjzarray_r; double** jjjzarray_i; double jjjmambzarray_r; double jjjmambzarray_i; #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &starttime); #endif for(int JJ = 0; JJ < idxj_max; JJ++) { const int j1 = idxj[JJ].j1; const int j2 = idxj[JJ].j2; const int j = idxj[JJ].j; dbdr = dbarray[j1][j2][j]; dbdr[0] = 0.0; dbdr[1] = 0.0; dbdr[2] = 0.0; // Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; // use zarray j1/j2 symmetry (optional) if (j1 >= j2) { jjjzarray_r = zarray_r[j1][j2][j]; jjjzarray_i = zarray_i[j1][j2][j]; } else { jjjzarray_r = zarray_r[j2][j1][j]; jjjzarray_i = zarray_i[j2][j1][j]; } for(int mb = 0; 2*mb < j; mb++) for(int ma = 0; ma <= j; ma++) { dudr_r = duarray_r[j][ma][mb]; dudr_i = duarray_i[j][ma][mb]; jjjmambzarray_r = jjjzarray_r[ma][mb]; jjjmambzarray_i = jjjzarray_i[ma][mb]; for(int k = 0; k < 3; k++) sumzdu_r[k] += dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i; } //end loop over ma mb // For j even, handle middle column if (j%2 == 0) { int mb = j/2; for(int ma = 0; ma < mb; ma++) { dudr_r = duarray_r[j][ma][mb]; dudr_i = duarray_i[j][ma][mb]; jjjmambzarray_r = jjjzarray_r[ma][mb]; jjjmambzarray_i = jjjzarray_i[ma][mb]; for(int k = 0; k < 3; k++) sumzdu_r[k] += dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i; } int ma = mb; dudr_r = duarray_r[j][ma][mb]; dudr_i = duarray_i[j][ma][mb]; jjjmambzarray_r = jjjzarray_r[ma][mb]; jjjmambzarray_i = jjjzarray_i[ma][mb]; for(int k = 0; k < 3; k++) sumzdu_r[k] += (dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i)*0.5; } // end if jeven for(int k = 0; k < 3; k++) dbdr[k] += 2.0*sumzdu_r[k]; // Sum over Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1) double j1fac = (j+1)/(j1+1.0); for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; // use zarray j1/j2 symmetry (optional) if (j >= j2) { jjjzarray_r = zarray_r[j][j2][j1]; jjjzarray_i = zarray_i[j][j2][j1]; } else { jjjzarray_r = zarray_r[j2][j][j1]; jjjzarray_i = zarray_i[j2][j][j1]; } for(int mb1 = 0; 2*mb1 < j1; mb1++) for(int ma1 = 0; ma1 <= j1; ma1++) { dudr_r = duarray_r[j1][ma1][mb1]; dudr_i = duarray_i[j1][ma1][mb1]; jjjmambzarray_r = jjjzarray_r[ma1][mb1]; jjjmambzarray_i = jjjzarray_i[ma1][mb1]; for(int k = 0; k < 3; k++) sumzdu_r[k] += dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i; } //end loop over ma1 mb1 // For j1 even, handle middle column if (j1%2 == 0) { int mb1 = j1/2; for(int ma1 = 0; ma1 < mb1; ma1++) { dudr_r = duarray_r[j1][ma1][mb1]; dudr_i = duarray_i[j1][ma1][mb1]; jjjmambzarray_r = jjjzarray_r[ma1][mb1]; jjjmambzarray_i = jjjzarray_i[ma1][mb1]; for(int k = 0; k < 3; k++) sumzdu_r[k] += dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i; } int ma1 = mb1; dudr_r = duarray_r[j1][ma1][mb1]; dudr_i = duarray_i[j1][ma1][mb1]; jjjmambzarray_r = jjjzarray_r[ma1][mb1]; jjjmambzarray_i = jjjzarray_i[ma1][mb1]; for(int k = 0; k < 3; k++) sumzdu_r[k] += (dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i)*0.5; } // end if j1even for(int k = 0; k < 3; k++) dbdr[k] += 2.0*sumzdu_r[k]*j1fac; // Sum over Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2) double j2fac = (j+1)/(j2+1.0); for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; // use zarray j1/j2 symmetry (optional) if (j1 >= j) { jjjzarray_r = zarray_r[j1][j][j2]; jjjzarray_i = zarray_i[j1][j][j2]; } else { jjjzarray_r = zarray_r[j][j1][j2]; jjjzarray_i = zarray_i[j][j1][j2]; } for(int mb2 = 0; 2*mb2 < j2; mb2++) for(int ma2 = 0; ma2 <= j2; ma2++) { dudr_r = duarray_r[j2][ma2][mb2]; dudr_i = duarray_i[j2][ma2][mb2]; jjjmambzarray_r = jjjzarray_r[ma2][mb2]; jjjmambzarray_i = jjjzarray_i[ma2][mb2]; for(int k = 0; k < 3; k++) sumzdu_r[k] += dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i; } //end loop over ma2 mb2 // For j2 even, handle middle column if (j2%2 == 0) { int mb2 = j2/2; for(int ma2 = 0; ma2 < mb2; ma2++) { dudr_r = duarray_r[j2][ma2][mb2]; dudr_i = duarray_i[j2][ma2][mb2]; jjjmambzarray_r = jjjzarray_r[ma2][mb2]; jjjmambzarray_i = jjjzarray_i[ma2][mb2]; for(int k = 0; k < 3; k++) sumzdu_r[k] += dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i; } int ma2 = mb2; dudr_r = duarray_r[j2][ma2][mb2]; dudr_i = duarray_i[j2][ma2][mb2]; jjjmambzarray_r = jjjzarray_r[ma2][mb2]; jjjmambzarray_i = jjjzarray_i[ma2][mb2]; for(int k = 0; k < 3; k++) sumzdu_r[k] += (dudr_r[k] * jjjmambzarray_r + dudr_i[k] * jjjmambzarray_i)*0.5; } // end if j2even for(int k = 0; k < 3; k++) dbdr[k] += 2.0*sumzdu_r[k]*j2fac; } //end loop over j1 j2 j #ifdef TIMING_INFO clock_gettime(CLOCK_REALTIME, &endtime); timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); #endif } /* ---------------------------------------------------------------------- copy Bi derivatives into a vector ------------------------------------------------------------------------- */ void SNA::copy_dbi2dbvec() { int ncount, j1, j2, j; ncount = 0; for(j1 = 0; j1 <= twojmax; j1++) { if(diagonalstyle == 0) { for(j2 = 0; j2 <= j1; j2++) for(j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { dbvec[ncount][0] = dbarray[j1][j2][j][0]; dbvec[ncount][1] = dbarray[j1][j2][j][1]; dbvec[ncount][2] = dbarray[j1][j2][j][2]; ncount++; } } else if(diagonalstyle == 1) { j2 = j1; for(j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { dbvec[ncount][0] = dbarray[j1][j2][j][0]; dbvec[ncount][1] = dbarray[j1][j2][j][1]; dbvec[ncount][2] = dbarray[j1][j2][j][2]; ncount++; } } else if(diagonalstyle == 2) { j = j2 = j1; dbvec[ncount][0] = dbarray[j1][j2][j][0]; dbvec[ncount][1] = dbarray[j1][j2][j][1]; dbvec[ncount][2] = dbarray[j1][j2][j][2]; ncount++; } else if(diagonalstyle == 3) { for(j2 = 0; j2 <= j1; j2++) for(j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) { dbvec[ncount][0] = dbarray[j1][j2][j][0]; dbvec[ncount][1] = dbarray[j1][j2][j][1]; dbvec[ncount][2] = dbarray[j1][j2][j][2]; ncount++; } } } } /* ---------------------------------------------------------------------- */ void SNA::zero_uarraytot() { for (int j = 0; j <= twojmax; j++) for (int ma = 0; ma <= j; ma++) for (int mb = 0; mb <= j; mb++) { uarraytot_r[j][ma][mb] = 0.0; uarraytot_i[j][ma][mb] = 0.0; } } /* ---------------------------------------------------------------------- */ void SNA::addself_uarraytot(double wself_in) { for (int j = 0; j <= twojmax; j++) for (int ma = 0; ma <= j; ma++) { uarraytot_r[j][ma][ma] = wself_in; uarraytot_i[j][ma][ma] = 0.0; } } /* ---------------------------------------------------------------------- add Wigner U-functions for one neighbor to the total ------------------------------------------------------------------------- */ void SNA::add_uarraytot(double r, double wj, double rcut) { double sfac; sfac = compute_sfac(r, rcut); sfac *= wj; for (int j = 0; j <= twojmax; j++) for (int ma = 0; ma <= j; ma++) for (int mb = 0; mb <= j; mb++) { uarraytot_r[j][ma][mb] += sfac * uarray_r[j][ma][mb]; uarraytot_i[j][ma][mb] += sfac * uarray_i[j][ma][mb]; } } void SNA::add_uarraytot_omp(double r, double wj, double rcut) { double sfac; sfac = compute_sfac(r, rcut); sfac *= wj; #if defined(_OPENMP) #pragma omp for #endif for (int j = 0; j <= twojmax; j++) for (int ma = 0; ma <= j; ma++) for (int mb = 0; mb <= j; mb++) { uarraytot_r[j][ma][mb] += sfac * uarray_r[j][ma][mb]; uarraytot_i[j][ma][mb] += sfac * uarray_i[j][ma][mb]; } } /* ---------------------------------------------------------------------- compute Wigner U-functions for one neighbor ------------------------------------------------------------------------- */ void SNA::compute_uarray(double x, double y, double z, double z0, double r) { double r0inv; double a_r, b_r, a_i, b_i; double rootpq; // compute Cayley-Klein parameters for unit quaternion r0inv = 1.0 / sqrt(r * r + z0 * z0); a_r = r0inv * z0; a_i = -r0inv * z; b_r = r0inv * y; b_i = -r0inv * x; // VMK Section 4.8.2 uarray_r[0][0][0] = 1.0; uarray_i[0][0][0] = 0.0; for (int j = 1; j <= twojmax; j++) { // fill in left side of matrix layer from previous layer for (int mb = 0; 2*mb <= j; mb++) { uarray_r[j][0][mb] = 0.0; uarray_i[j][0][mb] = 0.0; for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][j - mb]; uarray_r[j][ma][mb] += rootpq * (a_r * uarray_r[j - 1][ma][mb] + a_i * uarray_i[j - 1][ma][mb]); uarray_i[j][ma][mb] += rootpq * (a_r * uarray_i[j - 1][ma][mb] - a_i * uarray_r[j - 1][ma][mb]); rootpq = rootpqarray[ma + 1][j - mb]; uarray_r[j][ma + 1][mb] = -rootpq * (b_r * uarray_r[j - 1][ma][mb] + b_i * uarray_i[j - 1][ma][mb]); uarray_i[j][ma + 1][mb] = -rootpq * (b_r * uarray_i[j - 1][ma][mb] - b_i * uarray_r[j - 1][ma][mb]); } } // copy left side to right side with inversion symmetry VMK 4.4(2) // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) int mbpar = -1; for (int mb = 0; 2*mb <= j; mb++) { mbpar = -mbpar; int mapar = -mbpar; for (int ma = 0; ma <= j; ma++) { mapar = -mapar; if (mapar == 1) { uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb]; uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb]; } else { uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb]; uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb]; } } } } } void SNA::compute_uarray_omp(double x, double y, double z, double z0, double r, int sub_threads) { double r0inv; double a_r, b_r, a_i, b_i; double rootpq; // compute Cayley-Klein parameters for unit quaternion r0inv = 1.0 / sqrt(r * r + z0 * z0); a_r = r0inv * z0; a_i = -r0inv * z; b_r = r0inv * y; b_i = -r0inv * x; // VMK Section 4.8.2 uarray_r[0][0][0] = 1.0; uarray_i[0][0][0] = 0.0; for (int j = 1; j <= twojmax; j++) { #if defined(_OPENMP) #pragma omp for #endif for (int mb = 0; mb < j; mb++) { uarray_r[j][0][mb] = 0.0; uarray_i[j][0][mb] = 0.0; for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][j - mb]; uarray_r[j][ma][mb] += rootpq * (a_r * uarray_r[j - 1][ma][mb] + a_i * uarray_i[j - 1][ma][mb]); uarray_i[j][ma][mb] += rootpq * (a_r * uarray_i[j - 1][ma][mb] - a_i * uarray_r[j - 1][ma][mb]); rootpq = rootpqarray[ma + 1][j - mb]; uarray_r[j][ma + 1][mb] = -rootpq * (b_r * uarray_r[j - 1][ma][mb] + b_i * uarray_i[j - 1][ma][mb]); uarray_i[j][ma + 1][mb] = -rootpq * (b_r * uarray_i[j - 1][ma][mb] - b_i * uarray_r[j - 1][ma][mb]); } } int mb = j; uarray_r[j][0][mb] = 0.0; uarray_i[j][0][mb] = 0.0; #if defined(_OPENMP) #pragma omp for #endif for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][mb]; uarray_r[j][ma][mb] += rootpq * (b_r * uarray_r[j - 1][ma][mb - 1] - b_i * uarray_i[j - 1][ma][mb - 1]); uarray_i[j][ma][mb] += rootpq * (b_r * uarray_i[j - 1][ma][mb - 1] + b_i * uarray_r[j - 1][ma][mb - 1]); rootpq = rootpqarray[ma + 1][mb]; uarray_r[j][ma + 1][mb] = rootpq * (a_r * uarray_r[j - 1][ma][mb - 1] - a_i * uarray_i[j - 1][ma][mb - 1]); uarray_i[j][ma + 1][mb] = rootpq * (a_r * uarray_i[j - 1][ma][mb - 1] + a_i * uarray_r[j - 1][ma][mb - 1]); } } } /* ---------------------------------------------------------------------- compute derivatives of Wigner U-functions for one neighbor see comments in compute_uarray() ------------------------------------------------------------------------- */ void SNA::compute_duarray(double x, double y, double z, double z0, double r, double dz0dr, double wj, double rcut) { double r0inv; double a_r, a_i, b_r, b_i; double da_r[3], da_i[3], db_r[3], db_i[3]; double dz0[3], dr0inv[3], dr0invdr; double rootpq; double rinv = 1.0 / r; double ux = x * rinv; double uy = y * rinv; double uz = z * rinv; r0inv = 1.0 / sqrt(r * r + z0 * z0); a_r = z0 * r0inv; a_i = -z * r0inv; b_r = y * r0inv; b_i = -x * r0inv; dr0invdr = -pow(r0inv, 3.0) * (r + z0 * dz0dr); dr0inv[0] = dr0invdr * ux; dr0inv[1] = dr0invdr * uy; dr0inv[2] = dr0invdr * uz; dz0[0] = dz0dr * ux; dz0[1] = dz0dr * uy; dz0[2] = dz0dr * uz; for (int k = 0; k < 3; k++) { da_r[k] = dz0[k] * r0inv + z0 * dr0inv[k]; da_i[k] = -z * dr0inv[k]; } da_i[2] += -r0inv; for (int k = 0; k < 3; k++) { db_r[k] = y * dr0inv[k]; db_i[k] = -x * dr0inv[k]; } db_i[0] += -r0inv; db_r[1] += r0inv; uarray_r[0][0][0] = 1.0; duarray_r[0][0][0][0] = 0.0; duarray_r[0][0][0][1] = 0.0; duarray_r[0][0][0][2] = 0.0; uarray_i[0][0][0] = 0.0; duarray_i[0][0][0][0] = 0.0; duarray_i[0][0][0][1] = 0.0; duarray_i[0][0][0][2] = 0.0; for (int j = 1; j <= twojmax; j++) { for (int mb = 0; 2*mb <= j; mb++) { uarray_r[j][0][mb] = 0.0; duarray_r[j][0][mb][0] = 0.0; duarray_r[j][0][mb][1] = 0.0; duarray_r[j][0][mb][2] = 0.0; uarray_i[j][0][mb] = 0.0; duarray_i[j][0][mb][0] = 0.0; duarray_i[j][0][mb][1] = 0.0; duarray_i[j][0][mb][2] = 0.0; for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][j - mb]; uarray_r[j][ma][mb] += rootpq * (a_r * uarray_r[j - 1][ma][mb] + a_i * uarray_i[j - 1][ma][mb]); uarray_i[j][ma][mb] += rootpq * (a_r * uarray_i[j - 1][ma][mb] - a_i * uarray_r[j - 1][ma][mb]); for (int k = 0; k < 3; k++) { duarray_r[j][ma][mb][k] += rootpq * (da_r[k] * uarray_r[j - 1][ma][mb] + da_i[k] * uarray_i[j - 1][ma][mb] + a_r * duarray_r[j - 1][ma][mb][k] + a_i * duarray_i[j - 1][ma][mb][k]); duarray_i[j][ma][mb][k] += rootpq * (da_r[k] * uarray_i[j - 1][ma][mb] - da_i[k] * uarray_r[j - 1][ma][mb] + a_r * duarray_i[j - 1][ma][mb][k] - a_i * duarray_r[j - 1][ma][mb][k]); } rootpq = rootpqarray[ma + 1][j - mb]; uarray_r[j][ma + 1][mb] = -rootpq * (b_r * uarray_r[j - 1][ma][mb] + b_i * uarray_i[j - 1][ma][mb]); uarray_i[j][ma + 1][mb] = -rootpq * (b_r * uarray_i[j - 1][ma][mb] - b_i * uarray_r[j - 1][ma][mb]); for (int k = 0; k < 3; k++) { duarray_r[j][ma + 1][mb][k] = -rootpq * (db_r[k] * uarray_r[j - 1][ma][mb] + db_i[k] * uarray_i[j - 1][ma][mb] + b_r * duarray_r[j - 1][ma][mb][k] + b_i * duarray_i[j - 1][ma][mb][k]); duarray_i[j][ma + 1][mb][k] = -rootpq * (db_r[k] * uarray_i[j - 1][ma][mb] - db_i[k] * uarray_r[j - 1][ma][mb] + b_r * duarray_i[j - 1][ma][mb][k] - b_i * duarray_r[j - 1][ma][mb][k]); } } } int mbpar = -1; for (int mb = 0; 2*mb <= j; mb++) { mbpar = -mbpar; int mapar = -mbpar; for (int ma = 0; ma <= j; ma++) { mapar = -mapar; if (mapar == 1) { uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb]; uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb]; for (int k = 0; k < 3; k++) { duarray_r[j][j-ma][j-mb][k] = duarray_r[j][ma][mb][k]; duarray_i[j][j-ma][j-mb][k] = -duarray_i[j][ma][mb][k]; } } else { uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb]; uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb]; for (int k = 0; k < 3; k++) { duarray_r[j][j-ma][j-mb][k] = -duarray_r[j][ma][mb][k]; duarray_i[j][j-ma][j-mb][k] = duarray_i[j][ma][mb][k]; } } } } } double sfac = compute_sfac(r, rcut); double dsfac = compute_dsfac(r, rcut); sfac *= wj; dsfac *= wj; for (int j = 0; j <= twojmax; j++) for (int ma = 0; ma <= j; ma++) for (int mb = 0; mb <= j; mb++) { duarray_r[j][ma][mb][0] = dsfac * uarray_r[j][ma][mb] * ux + sfac * duarray_r[j][ma][mb][0]; duarray_i[j][ma][mb][0] = dsfac * uarray_i[j][ma][mb] * ux + sfac * duarray_i[j][ma][mb][0]; duarray_r[j][ma][mb][1] = dsfac * uarray_r[j][ma][mb] * uy + sfac * duarray_r[j][ma][mb][1]; duarray_i[j][ma][mb][1] = dsfac * uarray_i[j][ma][mb] * uy + sfac * duarray_i[j][ma][mb][1]; duarray_r[j][ma][mb][2] = dsfac * uarray_r[j][ma][mb] * uz + sfac * duarray_r[j][ma][mb][2]; duarray_i[j][ma][mb][2] = dsfac * uarray_i[j][ma][mb] * uz + sfac * duarray_i[j][ma][mb][2]; } } /* ---------------------------------------------------------------------- memory usage of arrays ------------------------------------------------------------------------- */ double SNA::memory_usage() { int jdim = twojmax + 1; double bytes; bytes = jdim * jdim * jdim * jdim * jdim * sizeof(double); bytes += 2 * jdim * jdim * jdim * sizeof(complex); bytes += 2 * jdim * jdim * jdim * sizeof(double); bytes += jdim * jdim * jdim * 3 * sizeof(complex); bytes += jdim * jdim * jdim * 3 * sizeof(double); bytes += ncoeff * sizeof(double); bytes += jdim * jdim * jdim * jdim * jdim * sizeof(complex); return bytes; } /* ---------------------------------------------------------------------- */ void SNA::create_twojmax_arrays() { int jdim = twojmax + 1; memory->create(cgarray, jdim, jdim, jdim, jdim, jdim, "sna:cgarray"); memory->create(rootpqarray, jdim+1, jdim+1, "sna:rootpqarray"); memory->create(barray, jdim, jdim, jdim, "sna:barray"); memory->create(dbarray, jdim, jdim, jdim, 3, "sna:dbarray"); memory->create(duarray_r, jdim, jdim, jdim, 3, "sna:duarray"); memory->create(duarray_i, jdim, jdim, jdim, 3, "sna:duarray"); memory->create(uarray_r, jdim, jdim, jdim, "sna:uarray"); memory->create(uarray_i, jdim, jdim, jdim, "sna:uarray"); if(!use_shared_arrays) { memory->create(uarraytot_r, jdim, jdim, jdim, "sna:uarraytot"); memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim, "sna:zarray"); memory->create(uarraytot_i, jdim, jdim, jdim, "sna:uarraytot"); memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim, "sna:zarray"); } } /* ---------------------------------------------------------------------- */ void SNA::destroy_twojmax_arrays() { memory->destroy(cgarray); memory->destroy(rootpqarray); memory->destroy(barray); memory->destroy(dbarray); memory->destroy(duarray_r); memory->destroy(duarray_i); memory->destroy(uarray_r); memory->destroy(uarray_i); if(!use_shared_arrays) { memory->destroy(uarraytot_r); memory->destroy(zarray_r); memory->destroy(uarraytot_i); memory->destroy(zarray_i); } } /* ---------------------------------------------------------------------- factorial n, wrapper for precomputed table ------------------------------------------------------------------------- */ double SNA::factorial(int n) { if (n < 0 || n > nmaxfactorial) { char str[128]; sprintf(str, "Invalid argument to factorial %d", n); error->all(FLERR, str); } return nfac_table[n]; } /* ---------------------------------------------------------------------- factorial n table, size SNA::nmaxfactorial+1 ------------------------------------------------------------------------- */ const double SNA::nfac_table[] = { 1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800, 479001600, 6227020800, 87178291200, 1307674368000, 20922789888000, 355687428096000, 6.402373705728e+15, 1.21645100408832e+17, 2.43290200817664e+18, 5.10909421717094e+19, 1.12400072777761e+21, 2.5852016738885e+22, 6.20448401733239e+23, 1.5511210043331e+25, 4.03291461126606e+26, 1.08888694504184e+28, 3.04888344611714e+29, 8.8417619937397e+30, 2.65252859812191e+32, 8.22283865417792e+33, 2.63130836933694e+35, 8.68331761881189e+36, 2.95232799039604e+38, 1.03331479663861e+40, 3.71993326789901e+41, 1.37637530912263e+43, 5.23022617466601e+44, 2.03978820811974e+46, 8.15915283247898e+47, 3.34525266131638e+49, 1.40500611775288e+51, 6.04152630633738e+52, 2.65827157478845e+54, 1.1962222086548e+56, 5.50262215981209e+57, 2.58623241511168e+59, 1.24139155925361e+61, 6.08281864034268e+62, 3.04140932017134e+64, 1.55111875328738e+66, 8.06581751709439e+67, 4.27488328406003e+69, 2.30843697339241e+71, 1.26964033536583e+73, 7.10998587804863e+74, 4.05269195048772e+76, 2.35056133128288e+78, 1.3868311854569e+80, 8.32098711274139e+81, 5.07580213877225e+83, 3.14699732603879e+85, 1.98260831540444e+87, 1.26886932185884e+89, 8.24765059208247e+90, 5.44344939077443e+92, 3.64711109181887e+94, 2.48003554243683e+96, 1.71122452428141e+98, 1.19785716699699e+100, 8.50478588567862e+101, 6.12344583768861e+103, 4.47011546151268e+105, 3.30788544151939e+107, 2.48091408113954e+109, 1.88549470166605e+111, 1.45183092028286e+113, 1.13242811782063e+115, 8.94618213078297e+116, 7.15694570462638e+118, 5.79712602074737e+120, 4.75364333701284e+122, 3.94552396972066e+124, 3.31424013456535e+126, 2.81710411438055e+128, 2.42270953836727e+130, 2.10775729837953e+132, 1.85482642257398e+134, 1.65079551609085e+136, 1.48571596448176e+138, 1.3520015276784e+140, 1.24384140546413e+142, 1.15677250708164e+144, 1.08736615665674e+146, 1.03299784882391e+148, 9.91677934870949e+149, 9.61927596824821e+151, 9.42689044888324e+153, 9.33262154439441e+155, 9.33262154439441e+157, 9.42594775983835e+159, 9.61446671503512e+161, 9.90290071648618e+163, 1.02990167451456e+166, 1.08139675824029e+168, 1.14628056373471e+170, 1.22652020319614e+172, 1.32464181945183e+174, 1.44385958320249e+176, 1.58824554152274e+178, 1.76295255109024e+180, 1.97450685722107e+182, 2.23119274865981e+184, 2.54355973347219e+186, 2.92509369349301e+188, 3.3931086844519e+190, 3.96993716080872e+192, 4.68452584975429e+194, 5.5745857612076e+196, 6.68950291344912e+198, 8.09429852527344e+200, 9.8750442008336e+202, 1.21463043670253e+205, 1.50614174151114e+207, 1.88267717688893e+209, 2.37217324288005e+211, 3.01266001845766e+213, 3.8562048236258e+215, 4.97450422247729e+217, 6.46685548922047e+219, 8.47158069087882e+221, 1.118248651196e+224, 1.48727070609069e+226, 1.99294274616152e+228, 2.69047270731805e+230, 3.65904288195255e+232, 5.01288874827499e+234, 6.91778647261949e+236, 9.61572319694109e+238, 1.34620124757175e+241, 1.89814375907617e+243, 2.69536413788816e+245, 3.85437071718007e+247, 5.5502938327393e+249, 8.04792605747199e+251, 1.17499720439091e+254, 1.72724589045464e+256, 2.55632391787286e+258, 3.80892263763057e+260, 5.71338395644585e+262, 8.62720977423323e+264, 1.31133588568345e+267, 2.00634390509568e+269, 3.08976961384735e+271, 4.78914290146339e+273, 7.47106292628289e+275, 1.17295687942641e+278, 1.85327186949373e+280, 2.94670227249504e+282, 4.71472363599206e+284, 7.59070505394721e+286, 1.22969421873945e+289, 2.0044015765453e+291, 3.28721858553429e+293, 5.42391066613159e+295, 9.00369170577843e+297, 1.503616514865e+300, // nmaxfactorial = 167 }; /* ---------------------------------------------------------------------- the function delta given by VMK Eq. 8.2(1) ------------------------------------------------------------------------- */ double SNA::deltacg(int j1, int j2, int j) { double sfaccg = factorial((j1 + j2 + j) / 2 + 1); return sqrt(factorial((j1 + j2 - j) / 2) * factorial((j1 - j2 + j) / 2) * factorial((-j1 + j2 + j) / 2) / sfaccg); } /* ---------------------------------------------------------------------- assign Clebsch-Gordan coefficients using the quasi-binomial formula VMK 8.2.1(3) ------------------------------------------------------------------------- */ void SNA::init_clebsch_gordan() { double sum,dcg,sfaccg; int m, aa2, bb2, cc2; int ifac; for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= twojmax; j2++) for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) for (int m1 = 0; m1 <= j1; m1 += 1) { aa2 = 2 * m1 - j1; for (int m2 = 0; m2 <= j2; m2 += 1) { // -c <= cc <= c bb2 = 2 * m2 - j2; m = (aa2 + bb2 + j) / 2; if(m < 0 || m > j) continue; sum = 0.0; for (int z = MAX(0, MAX(-(j - j2 + aa2) / 2, -(j - j1 - bb2) / 2)); z <= MIN((j1 + j2 - j) / 2, MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); z++) { ifac = z % 2 ? -1 : 1; sum += ifac / (factorial(z) * factorial((j1 + j2 - j) / 2 - z) * factorial((j1 - aa2) / 2 - z) * factorial((j2 + bb2) / 2 - z) * factorial((j - j2 + aa2) / 2 + z) * factorial((j - j1 - bb2) / 2 + z)); } cc2 = 2 * m - j; dcg = deltacg(j1, j2, j); sfaccg = sqrt(factorial((j1 + aa2) / 2) * factorial((j1 - aa2) / 2) * factorial((j2 + bb2) / 2) * factorial((j2 - bb2) / 2) * factorial((j + cc2) / 2) * factorial((j - cc2) / 2) * (j + 1)); cgarray[j1][j2][j][m1][m2] = sum * dcg * sfaccg; } } } /* ---------------------------------------------------------------------- pre-compute table of sqrt[p/m2], p, q = 1,twojmax the p = 0, q = 0 entries are allocated and skipped for convenience. ------------------------------------------------------------------------- */ void SNA::init_rootpqarray() { for (int p = 1; p <= twojmax; p++) for (int q = 1; q <= twojmax; q++) rootpqarray[p][q] = sqrt(static_cast(p)/q); } /* ---------------------------------------------------------------------- a = j/2 ------------------------------------------------------------------------- */ void SNA::jtostr(char* str, int j) { if(j % 2 == 0) sprintf(str, "%d", j / 2); else sprintf(str, "%d/2", j); } /* ---------------------------------------------------------------------- aa = m - j/2 ------------------------------------------------------------------------- */ void SNA::mtostr(char* str, int j, int m) { if(j % 2 == 0) sprintf(str, "%d", m - j / 2); else sprintf(str, "%d/2", 2 * m - j); } /* ---------------------------------------------------------------------- list values of Clebsch-Gordan coefficients using notation of VMK Table 8.11 ------------------------------------------------------------------------- */ void SNA::print_clebsch_gordan(FILE* file) { char stra[20], strb[20], strc[20], straa[20], strbb[20], strcc[20]; int m, aa2, bb2; fprintf(file, "a, aa, b, bb, c, cc, c(a,aa,b,bb,c,cc) \n"); for (int j1 = 0; j1 <= twojmax; j1++) { jtostr(stra, j1); for (int j2 = 0; j2 <= twojmax; j2++) { jtostr(strb, j2); for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { jtostr(strc, j); for (int m1 = 0; m1 <= j1; m1 += 1) { mtostr(straa, j1, m1); aa2 = 2 * m1 - j1; for (int m2 = 0; m2 <= j2; m2 += 1) { bb2 = 2 * m2 - j2; m = (aa2 + bb2 + j) / 2; if(m < 0 || m > j) continue; mtostr(strbb, j2, m2); mtostr(strcc, j, m); fprintf(file, "%s\t%s\t%s\t%s\t%s\t%s\t%g\n", stra, straa, strb, strbb, strc, strcc, cgarray[j1][j2][j][m1][m2]); } } } } } } /* ---------------------------------------------------------------------- */ int SNA::compute_ncoeff() { int ncount; ncount = 0; for (int j1 = 0; j1 <= twojmax; j1++) if(diagonalstyle == 0) { for (int j2 = 0; j2 <= j1; j2++) for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) ncount++; } else if(diagonalstyle == 1) { int j2 = j1; for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) ncount++; } else if(diagonalstyle == 2) { ncount++; } else if(diagonalstyle == 3) { for (int j2 = 0; j2 <= j1; j2++) for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) ncount++; } return ncount; } /* ---------------------------------------------------------------------- */ double SNA::compute_sfac(double r, double rcut) { if (switch_flag == 0) return 1.0; if (switch_flag == 1) { if(r <= rmin0) return 1.0; else if(r > rcut) return 0.0; else { double rcutfac = MY_PI / (rcut - rmin0); return 0.5 * (cos((r - rmin0) * rcutfac) + 1.0); } } return 0.0; } /* ---------------------------------------------------------------------- */ double SNA::compute_dsfac(double r, double rcut) { if (switch_flag == 0) return 0.0; if (switch_flag == 1) { if(r <= rmin0) return 0.0; else if(r > rcut) return 0.0; else { double rcutfac = MY_PI / (rcut - rmin0); return -0.5 * sin((r - rmin0) * rcutfac) * rcutfac; } } return 0.0; }