diff --git a/doc/src/compute_sna_atom.txt b/doc/src/compute_sna_atom.txt index efbf2e9ea3..518d28aec9 100644 --- a/doc/src/compute_sna_atom.txt +++ b/doc/src/compute_sna_atom.txt @@ -24,12 +24,7 @@ twojmax = band limit for bispectrum components (non-negative integer) :l R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l w_1, w_2,... = list of neighbor weights, one for each type :l zero or more keyword/value pairs may be appended :l -keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l - {diagonal} value = {0} or {1} or {2} or {3} - {0} = all j1, j2, j <= twojmax, j2 <= j1 - {1} = subset satisfying j1 == j2 - {2} = subset satisfying j1 == j2 == j3 - {3} = subset satisfying j2 <= j1 <= j +keyword = {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l {rmin0} value = parameter in distance to angle conversion (distance units) {switchflag} value = {0} or {1} {0} = do not use switching function @@ -44,7 +39,7 @@ keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag [Examples:] -compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 diagonal 3 rmin0 0.0 +compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 rmin0 0.0 compute db all sna/atom 1.4 0.95 6 2.0 1.0 compute vb all sna/atom 1.4 0.95 6 2.0 1.0 :pre @@ -151,7 +146,7 @@ The argument {rfac0} and the optional keyword {rmin0} define the linear mapping from radial distance to polar angle {theta0} on the 3-sphere. -The argument {twojmax} and the keyword {diagonal} define which +The argument {twojmax} defines which bispectrum components are generated. See section below on output for a detailed explanation of the number of bispectrum components and the ordered in which they are listed. @@ -192,23 +187,18 @@ command that includes all pairs in the neighbor list. Compute {sna/atom} calculates a per-atom array, each column corresponding to a particular bispectrum component. The total number of columns and the identity of the bispectrum component contained in -each column depend on the values of {twojmax} and {diagonal}, as +each column depend of the value of {twojmax}, as described by the following piece of python code: for j1 in range(0,twojmax+1): - if(diagonal==2): - print j1/2.,j1/2.,j1/2. - elif(diagonal==1): - for j in range(0,min(twojmax,2*j1)+1,2): - print j1/2.,j1/2.,j/2. - elif(diagonal==0): - for j2 in range(0,j1+1): - for j in range(j1-j2,min(twojmax,j1+j2)+1,2): - print j1/2.,j2/2.,j/2. - elif(diagonal==3): - for j2 in range(0,j1+1): - for j in range(j1-j2,min(twojmax,j1+j2)+1,2): - if (j>=j1): print j1/2.,j2/2.,j/2. :pre + for j2 in range(0,j1+1): + for j in range(j1-j2,min(twojmax,j1+j2)+1,2): + if (j>=j1): print j1/2.,j2/2.,j/2. :pre + +NOTE: the {diagonal} keyword allowing other possible choices +for the number of bispectrum components was removed in 2019, +since all potentials use the value of 3, corresponding to the +above set of bispectrum components. Compute {snad/atom} evaluates a per-atom array. The columns are arranged into {ntypes} blocks, listed in order of atom type {I}. Each @@ -259,7 +249,7 @@ package"_Build_package.html doc page for more info. [Default:] -The optional keyword defaults are {diagonal} = 0, {rmin0} = 0, +The optional keyword defaults are {rmin0} = 0, {switchflag} = 1, {bzeroflag} = 1, {quadraticflag} = 0, :line diff --git a/doc/src/pair_snap.txt b/doc/src/pair_snap.txt index a796cfdeba..1fba74a188 100644 --- a/doc/src/pair_snap.txt +++ b/doc/src/pair_snap.txt @@ -38,7 +38,7 @@ where {B_k^i} is the {k}-th bispectrum component of atom {i}, and {beta_k^alpha_i} is the corresponding linear coefficient that depends on {alpha_i}, the SNAP element of atom {i}. The number of bispectrum components used and their definitions -depend on the values of {twojmax} and {diagonalstyle} +depend on the value of {twojmax} defined in the SNAP parameter file described below. The bispectrum calculation is described in more detail in "compute sna/atom"_compute_sna_atom.html. @@ -125,14 +125,13 @@ This line is followed by {ncoeff} coefficients, one per line. The SNAP parameter file can contain blank and comment lines (start with #) anywhere. Each non-blank non-comment line must contain one keyword/value pair. The required keywords are {rcutfac} and -{twojmax}. Optional keywords are {rfac0}, {rmin0}, {diagonalstyle}, +{twojmax}. Optional keywords are {rfac0}, {rmin0}, {switchflag}, and {bzeroflag}. The default values for these keywords are {rfac0} = 0.99363 {rmin0} = 0.0 -{diagonalstyle} = 3 {switchflag} = 0 {bzeroflag} = 1 {quadraticflag} = 1 :ul @@ -144,6 +143,9 @@ If {quadraticflag} is set to 1, then the SNAP energy expression includes the qua The SNAP element file should contain {K}({K}+1)/2 additional coefficients for each element, the upper-triangular elements of alpha. +NOTE: The previously used {diagonalstyle} keyword was removed in 2019, +since all known SNAP potentials use the default value of 3. + :line [Mixing, shift, table, tail correction, restart, rRESPA info]: diff --git a/potentials/Ta06A.snapparam b/potentials/Ta06A.snapparam index 283629d658..629d96d708 100644 --- a/potentials/Ta06A.snapparam +++ b/potentials/Ta06A.snapparam @@ -10,6 +10,5 @@ twojmax 6 rfac0 0.99363 rmin0 0 -diagonalstyle 3 bzeroflag 0 quadraticflag 0 diff --git a/potentials/W_2940_2017_2.snapparam b/potentials/W_2940_2017_2.snapparam index 27ab61a266..49f3094d08 100644 --- a/potentials/W_2940_2017_2.snapparam +++ b/potentials/W_2940_2017_2.snapparam @@ -8,6 +8,5 @@ twojmax 8 rfac0 0.99363 rmin0 0 -diagonalstyle 3 bzeroflag 0 quadraticflag 0 diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index bb2a5e9171..0ec4ed0995 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -85,9 +85,6 @@ void PairSNAPKokkos::init_style() if (force->newton_pair == 0) error->all(FLERR,"Pair style SNAP requires newton pair on"); - if (diagonalstyle != 3) - error->all(FLERR,"Must use diagonal style = 3 with pair snap/kk"); - // irequest = neigh request made by parent class neighflag = lmp->kokkos->neighflag; @@ -343,23 +340,12 @@ void PairSNAPKokkos::coeff(int narg, char **arg) Kokkos::deep_copy(d_coeffelem,h_coeffelem); Kokkos::deep_copy(d_map,h_map); - // deallocate non-kokkos sna - - if (sna) { - for (int tid = 0; tid(rfac0,twojmax, - diagonalstyle,use_shared_arrays, rmin0,switchflag,bzeroflag); - //if (!use_shared_arrays) - snaKK.grow_rij(nmax); + snaKK.grow_rij(0); snaKK.init(); } @@ -667,8 +653,6 @@ double PairSNAPKokkos::memory_usage() int n = atom->ntypes+1; bytes += n*n*sizeof(int); bytes += n*n*sizeof(double); - bytes += 3*nmax*sizeof(double); - bytes += nmax*sizeof(int); bytes += (2*ncoeffall)*sizeof(double); bytes += (ncoeff*3)*sizeof(double); bytes += snaKK.memory_usage(); diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index 7a80b262b7..40e5fe0ad4 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -48,7 +48,7 @@ inline SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team); inline - SNAKokkos(double, int, int, int, double, int, int); + SNAKokkos(double, int, double, int, int); KOKKOS_INLINE_FUNCTION ~SNAKokkos(); @@ -178,12 +178,6 @@ inline double, double, double, // compute_duidrj double, double, double, double, double); - // if number of atoms are small use per atom arrays - // for twojmax arrays, rij, inside, bvec - // this will increase the memory footprint considerably, - // but allows parallel filling and reuse of these arrays - int use_shared_arrays; - // Sets the style for the switching function // 0 = none // 1 = cosine diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 0f2a450a3d..c43003af97 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -27,19 +27,17 @@ static const double MY_PI = 3.14159265358979323846; // pi template inline SNAKokkos::SNAKokkos(double rfac0_in, - int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in, + int twojmax_in, double rmin0_in, int switch_flag_in, int bzero_flag_in) { wself = 1.0; - use_shared_arrays = use_shared_arrays_in; rfac0 = rfac0_in; rmin0 = rmin0_in; switch_flag = switch_flag_in; bzero_flag = bzero_flag_in; twojmax = twojmax_in; - diagonalstyle = diagonalstyle_in; ncoeff = compute_ncoeff(); @@ -70,14 +68,12 @@ KOKKOS_INLINE_FUNCTION SNAKokkos::SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team) { wself = sna.wself; - use_shared_arrays = sna.use_shared_arrays; rfac0 = sna.rfac0; rmin0 = sna.rmin0; switch_flag = sna.switch_flag; bzero_flag = sna.bzero_flag; twojmax = sna.twojmax; - diagonalstyle = sna.diagonalstyle; ncoeff = sna.ncoeff; nmax = sna.nmax; @@ -104,48 +100,45 @@ template inline void SNAKokkos::build_indexlist() { - if(diagonalstyle == 3) { - int idxj_count = 0; - int idxj_full_count = 0; + int idxj_count = 0; + int idxj_full_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - if (j >= j1) idxj_count++; - idxj_full_count++; - } + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + if (j >= j1) idxj_count++; + idxj_full_count++; + } - // indexList can be changed here + // indexList can be changed here - idxj = Kokkos::View("SNAKokkos::idxj",idxj_count); - idxj_full = Kokkos::View("SNAKokkos::idxj_full",idxj_full_count); - auto h_idxj = Kokkos::create_mirror_view(idxj); - auto h_idxj_full = Kokkos::create_mirror_view(idxj_full); + idxj = Kokkos::View("SNAKokkos::idxj",idxj_count); + idxj_full = Kokkos::View("SNAKokkos::idxj_full",idxj_full_count); + auto h_idxj = Kokkos::create_mirror_view(idxj); + auto h_idxj_full = Kokkos::create_mirror_view(idxj_full); - idxj_max = idxj_count; - idxj_full_max = idxj_full_count; + idxj_max = idxj_count; + idxj_full_max = idxj_full_count; - idxj_count = 0; - idxj_full_count = 0; + idxj_count = 0; + idxj_full_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - if (j >= j1) { - h_idxj[idxj_count].j1 = j1; - h_idxj[idxj_count].j2 = j2; - h_idxj[idxj_count].j = j; - idxj_count++; - } - h_idxj_full[idxj_full_count].j1 = j1; - h_idxj_full[idxj_full_count].j2 = j2; - h_idxj_full[idxj_full_count].j = j; - idxj_full_count++; - } - Kokkos::deep_copy(idxj,h_idxj); - Kokkos::deep_copy(idxj_full,h_idxj_full); - - } + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + if (j >= j1) { + h_idxj[idxj_count].j1 = j1; + h_idxj[idxj_count].j2 = j2; + h_idxj[idxj_count].j = j; + idxj_count++; + } + h_idxj_full[idxj_full_count].j1 = j1; + h_idxj_full[idxj_full_count].j2 = j2; + h_idxj_full[idxj_full_count].j = j; + idxj_full_count++; + } + Kokkos::deep_copy(idxj,h_idxj); + Kokkos::deep_copy(idxj_full,h_idxj_full); } /* ---------------------------------------------------------------------- */ @@ -1223,26 +1216,10 @@ int SNAKokkos::compute_ncoeff() ncount = 0; for (int j1 = 0; j1 <= twojmax; j1++) - if(diagonalstyle == 0) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 1) { - int j2 = j1; - + for (int j2 = 0; j2 <= j1; j2++) for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 2) { - ncount++; - } else if(diagonalstyle == 3) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) ncount++; - } - + j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) ncount++; return ncount; } diff --git a/src/SNAP/compute_sna_atom.cpp b/src/SNAP/compute_sna_atom.cpp index 5ca63a7e85..cc7a84281e 100644 --- a/src/SNAP/compute_sna_atom.cpp +++ b/src/SNAP/compute_sna_atom.cpp @@ -25,7 +25,6 @@ #include "comm.h" #include "memory.h" #include "error.h" -#include "openmp_snap.h" using namespace LAMMPS_NS; @@ -45,7 +44,6 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : // default values - diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; bzeroflag = 1; @@ -85,14 +83,7 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : int iarg = nargmin; while (iarg < narg) { - if (strcmp(arg[iarg],"diagonal") == 0) { - if (iarg+2 > narg) - error->all(FLERR,"Illegal compute sna/atom command"); - diagonalstyle = atoi(arg[iarg+1]); - if (diagonalstyle < 0 || diagonalstyle > 3) - error->all(FLERR,"Illegal compute sna/atom command"); - iarg += 2; - } else if (strcmp(arg[iarg],"rmin0") == 0) { + if (strcmp(arg[iarg],"rmin0") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal compute sna/atom command"); rmin0 = atof(arg[iarg+1]); @@ -115,28 +106,16 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute sna/atom command"); } - nthreads = comm->nthreads; - snaptr = new SNA*[nthreads]; -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag) -#endif - { - int tid = omp_get_thread_num(); + snaptr = new SNA(lmp,rfac0,twojmax, + rmin0,switchflag,bzeroflag); - // always unset use_shared_arrays since it does not work with computes - snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle, - 0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag); - } - - ncoeff = snaptr[0]->ncoeff; + ncoeff = snaptr->ncoeff; size_peratom_cols = ncoeff; if (quadraticflag) size_peratom_cols += (ncoeff*(ncoeff+1))/2; peratom_flag = 1; nmax = 0; - njmax = 0; sna = NULL; - } /* ---------------------------------------------------------------------- */ @@ -147,9 +126,7 @@ ComputeSNAAtom::~ComputeSNAAtom() memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(cutsq); - for (int tid = 0; tidcompute[i]->style,"sna/atom") == 0) count++; if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one compute sna/atom"); -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - snaptr[tid]->init(); - } + snaptr->init(); } /* ---------------------------------------------------------------------- */ @@ -223,11 +194,7 @@ void ComputeSNAAtom::compute_peratom() double** const x = atom->x; const int* const mask = atom->mask; -#if defined(_OPENMP) -#pragma omp parallel for default(none) -#endif for (int ii = 0; ii < inum; ii++) { - const int tid = omp_get_thread_num(); const int i = ilist[ii]; if (mask[i] & groupbit) { @@ -241,7 +208,7 @@ void ComputeSNAAtom::compute_peratom() // insure rij, inside, and typej are of size jnum - snaptr[tid]->grow_rij(jnum); + snaptr->grow_rij(jnum); // rij[][3] = displacements between atom I and those neighbors // inside = indices of neighbors of I within cutoff @@ -258,26 +225,25 @@ void ComputeSNAAtom::compute_peratom() const double rsq = delx*delx + dely*dely + delz*delz; int jtype = type[j]; if (rsq < cutsq[itype][jtype] && rsq>1e-20) { - snaptr[tid]->rij[ninside][0] = delx; - snaptr[tid]->rij[ninside][1] = dely; - snaptr[tid]->rij[ninside][2] = delz; - snaptr[tid]->inside[ninside] = j; - snaptr[tid]->wj[ninside] = wjelem[jtype]; - snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jtype]; + snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; ninside++; } } - snaptr[tid]->compute_ui(ninside); - snaptr[tid]->compute_zi(); - snaptr[tid]->compute_bi(); - snaptr[tid]->copy_bi2bvec(); + snaptr->compute_ui(ninside); + snaptr->compute_zi(); + snaptr->compute_bi(); for (int icoeff = 0; icoeff < ncoeff; icoeff++) - sna[i][icoeff] = snaptr[tid]->bvec[icoeff]; + sna[i][icoeff] = snaptr->blist[icoeff]; if (quadraticflag) { int ncount = ncoeff; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr[tid]->bvec[icoeff]; + double bi = snaptr->blist[icoeff]; // diagonal element of quadratic matrix @@ -286,7 +252,7 @@ void ComputeSNAAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) - sna[i][ncount++] = bi*snaptr[tid]->bvec[jcoeff]; + sna[i][ncount++] = bi*snaptr->blist[jcoeff]; } } } else { @@ -302,10 +268,9 @@ void ComputeSNAAtom::compute_peratom() double ComputeSNAAtom::memory_usage() { - double bytes = nmax*size_peratom_cols * sizeof(double); - bytes += 3*njmax*sizeof(double); - bytes += njmax*sizeof(int); - bytes += snaptr[0]->memory_usage()*comm->nthreads; + double bytes = nmax*size_peratom_cols * sizeof(double); // sna + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/compute_sna_atom.h b/src/SNAP/compute_sna_atom.h index 2f6fb18996..105a62a37a 100644 --- a/src/SNAP/compute_sna_atom.h +++ b/src/SNAP/compute_sna_atom.h @@ -34,7 +34,7 @@ class ComputeSNAAtom : public Compute { double memory_usage(); private: - int nmax, njmax, diagonalstyle; + int nmax; int ncoeff; double **cutsq; class NeighList *list; @@ -42,10 +42,9 @@ class ComputeSNAAtom : public Compute { double rcutfac; double *radelem; double *wjelem; - class SNA** snaptr; + class SNA* snaptr; double cutmax; int quadraticflag; - int nthreads; }; } diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp index b0395d5317..37587a0aae 100644 --- a/src/SNAP/compute_snad_atom.cpp +++ b/src/SNAP/compute_snad_atom.cpp @@ -25,7 +25,6 @@ #include "comm.h" #include "memory.h" #include "error.h" -#include "openmp_snap.h" using namespace LAMMPS_NS; @@ -45,7 +44,6 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : // default values - diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; bzeroflag = 1; @@ -83,14 +81,7 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : int iarg = nargmin; while (iarg < narg) { - if (strcmp(arg[iarg],"diagonal") == 0) { - if (iarg+2 > narg) - error->all(FLERR,"Illegal compute snad/atom command"); - diagonalstyle = atof(arg[iarg+1]); - if (diagonalstyle < 0 || diagonalstyle > 3) - error->all(FLERR,"Illegal compute snad/atom command"); - iarg += 2; - } else if (strcmp(arg[iarg],"rmin0") == 0) { + if (strcmp(arg[iarg],"rmin0") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal compute snad/atom command"); rmin0 = atof(arg[iarg+1]); @@ -113,20 +104,10 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute snad/atom command"); } - nthreads = comm->nthreads; - snaptr = new SNA*[nthreads]; -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag) -#endif - { - int tid = omp_get_thread_num(); + snaptr = new SNA(lmp,rfac0,twojmax, + rmin0,switchflag,bzeroflag); - // always unset use_shared_arrays since it does not work with computes - snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle, - 0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag); - } - - ncoeff = snaptr[0]->ncoeff; + ncoeff = snaptr->ncoeff; nperdim = ncoeff; if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2; yoffset = nperdim; @@ -136,9 +117,7 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : peratom_flag = 1; nmax = 0; - njmax = 0; snad = NULL; - } /* ---------------------------------------------------------------------- */ @@ -149,9 +128,7 @@ ComputeSNADAtom::~ComputeSNADAtom() memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(cutsq); - for (int tid = 0; tidcompute[i]->style,"snad/atom") == 0) count++; if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one compute snad/atom"); -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - snaptr[tid]->init(); - } + snaptr->init(); } /* ---------------------------------------------------------------------- */ @@ -235,11 +206,7 @@ void ComputeSNADAtom::compute_peratom() double** const x = atom->x; const int* const mask = atom->mask; -#if defined(_OPENMP) -#pragma omp parallel for default(none) -#endif for (int ii = 0; ii < inum; ii++) { - const int tid = omp_get_thread_num(); const int i = ilist[ii]; if (mask[i] & groupbit) { @@ -258,7 +225,7 @@ void ComputeSNADAtom::compute_peratom() // insure rij, inside, and typej are of size jnum - snaptr[tid]->grow_rij(jnum); + snaptr->grow_rij(jnum); // rij[][3] = displacements between atom I and those neighbors // inside = indices of neighbors of I within cutoff @@ -276,30 +243,28 @@ void ComputeSNADAtom::compute_peratom() const double rsq = delx*delx + dely*dely + delz*delz; int jtype = type[j]; if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - snaptr[tid]->rij[ninside][0] = delx; - snaptr[tid]->rij[ninside][1] = dely; - snaptr[tid]->rij[ninside][2] = delz; - snaptr[tid]->inside[ninside] = j; - snaptr[tid]->wj[ninside] = wjelem[jtype]; - snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jtype]; + snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; ninside++; } } - snaptr[tid]->compute_ui(ninside); - snaptr[tid]->compute_zi(); + snaptr->compute_ui(ninside); + snaptr->compute_zi(); if (quadraticflag) { - snaptr[tid]->compute_bi(); - snaptr[tid]->copy_bi2bvec(); + snaptr->compute_bi(); } for (int jj = 0; jj < ninside; jj++) { - const int j = snaptr[tid]->inside[jj]; - snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj], - snaptr[tid]->wj[jj], - snaptr[tid]->rcutij[jj]); - snaptr[tid]->compute_dbidrj(); - snaptr[tid]->copy_dbi2dbvec(); + const int j = snaptr->inside[jj]; + snaptr->compute_duidrj(snaptr->rij[jj], + snaptr->wj[jj], + snaptr->rcutij[jj]); + snaptr->compute_dbidrj(); // Accumulate -dBi/dRi, -dBi/dRj @@ -307,12 +272,12 @@ void ComputeSNADAtom::compute_peratom() double *snadj = snad[j]+typeoffset; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - snadi[icoeff] += snaptr[tid]->dbvec[icoeff][0]; - snadi[icoeff+yoffset] += snaptr[tid]->dbvec[icoeff][1]; - snadi[icoeff+zoffset] += snaptr[tid]->dbvec[icoeff][2]; - snadj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]; - snadj[icoeff+yoffset] -= snaptr[tid]->dbvec[icoeff][1]; - snadj[icoeff+zoffset] -= snaptr[tid]->dbvec[icoeff][2]; + snadi[icoeff] += snaptr->dblist[icoeff][0]; + snadi[icoeff+yoffset] += snaptr->dblist[icoeff][1]; + snadi[icoeff+zoffset] += snaptr->dblist[icoeff][2]; + snadj[icoeff] -= snaptr->dblist[icoeff][0]; + snadj[icoeff+yoffset] -= snaptr->dblist[icoeff][1]; + snadj[icoeff+zoffset] -= snaptr->dblist[icoeff][2]; } if (quadraticflag) { @@ -321,10 +286,10 @@ void ComputeSNADAtom::compute_peratom() snadj += quadraticoffset; int ncount = 0; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr[tid]->bvec[icoeff]; - double bix = snaptr[tid]->dbvec[icoeff][0]; - double biy = snaptr[tid]->dbvec[icoeff][1]; - double biz = snaptr[tid]->dbvec[icoeff][2]; + double bi = snaptr->blist[icoeff]; + double bix = snaptr->dblist[icoeff][0]; + double biy = snaptr->dblist[icoeff][1]; + double biz = snaptr->dblist[icoeff][2]; // diagonal elements of quadratic matrix @@ -343,12 +308,12 @@ void ComputeSNADAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0] - + bix*snaptr[tid]->bvec[jcoeff]; - double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1] - + biy*snaptr[tid]->bvec[jcoeff]; - double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2] - + biz*snaptr[tid]->bvec[jcoeff]; + double dbxtmp = bi*snaptr->dblist[jcoeff][0] + + bix*snaptr->blist[jcoeff]; + double dbytmp = bi*snaptr->dblist[jcoeff][1] + + biy*snaptr->blist[jcoeff]; + double dbztmp = bi*snaptr->dblist[jcoeff][2] + + biz*snaptr->blist[jcoeff]; snadi[ncount] += dbxtmp; snadi[ncount+yoffset] += dbytmp; @@ -404,10 +369,9 @@ void ComputeSNADAtom::unpack_reverse_comm(int n, int *list, double *buf) double ComputeSNADAtom::memory_usage() { - double bytes = nmax*size_peratom_cols * sizeof(double); - bytes += 3*njmax*sizeof(double); - bytes += njmax*sizeof(int); - bytes += 3*nperdim*atom->ntypes; - bytes += snaptr[0]->memory_usage()*comm->nthreads; + + double bytes = nmax*size_peratom_cols * sizeof(double); // snad + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/compute_snad_atom.h b/src/SNAP/compute_snad_atom.h index 92003a9bc5..ac353d8553 100644 --- a/src/SNAP/compute_snad_atom.h +++ b/src/SNAP/compute_snad_atom.h @@ -36,7 +36,7 @@ class ComputeSNADAtom : public Compute { double memory_usage(); private: - int nmax, njmax, diagonalstyle; + int nmax; int ncoeff, nperdim, yoffset, zoffset; double **cutsq; class NeighList *list; @@ -44,10 +44,9 @@ class ComputeSNADAtom : public Compute { double rcutfac; double *radelem; double *wjelem; - class SNA** snaptr; + class SNA* snaptr; double cutmax; int quadraticflag; - int nthreads; }; } diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp index b2d555f713..1f702496ed 100644 --- a/src/SNAP/compute_snav_atom.cpp +++ b/src/SNAP/compute_snav_atom.cpp @@ -25,7 +25,6 @@ #include "comm.h" #include "memory.h" #include "error.h" -#include "openmp_snap.h" using namespace LAMMPS_NS; @@ -45,7 +44,6 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : // default values - diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; bzeroflag = 1; @@ -79,14 +77,7 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : int iarg = nargmin; while (iarg < narg) { - if (strcmp(arg[iarg],"diagonal") == 0) { - if (iarg+2 > narg) - error->all(FLERR,"Illegal compute snav/atom command"); - diagonalstyle = atof(arg[iarg+1]); - if (diagonalstyle < 0 || diagonalstyle > 3) - error->all(FLERR,"Illegal compute snav/atom command"); - iarg += 2; - } else if (strcmp(arg[iarg],"rmin0") == 0) { + if (strcmp(arg[iarg],"rmin0") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal compute snav/atom command"); rmin0 = atof(arg[iarg+1]); @@ -109,20 +100,10 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute snav/atom command"); } - nthreads = comm->nthreads; - snaptr = new SNA*[nthreads]; -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag) -#endif - { - int tid = omp_get_thread_num(); + snaptr = new SNA(lmp,rfac0,twojmax, + rmin0,switchflag,bzeroflag); - // always unset use_shared_arrays since it does not work with computes - snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle, - 0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag); - } - - ncoeff = snaptr[0]->ncoeff; + ncoeff = snaptr->ncoeff; nperdim = ncoeff; if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2; size_peratom_cols = 6*nperdim*atom->ntypes; @@ -130,7 +111,6 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : peratom_flag = 1; nmax = 0; - njmax = 0; snav = NULL; } @@ -144,9 +124,7 @@ ComputeSNAVAtom::~ComputeSNAVAtom() memory->destroy(wjelem); memory->destroy(cutsq); - for (int tid = 0; tidcompute[i]->style,"snav/atom") == 0) count++; if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one compute snav/atom"); -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - snaptr[tid]->init(); - } + snaptr->init(); } /* ---------------------------------------------------------------------- */ @@ -230,11 +202,7 @@ void ComputeSNAVAtom::compute_peratom() double** const x = atom->x; const int* const mask = atom->mask; -#if defined(_OPENMP) -#pragma omp parallel for default(none) -#endif for (int ii = 0; ii < inum; ii++) { - const int tid = omp_get_thread_num(); const int i = ilist[ii]; if (mask[i] & groupbit) { @@ -251,7 +219,7 @@ void ComputeSNAVAtom::compute_peratom() // insure rij, inside, and typej are of size jnum - snaptr[tid]->grow_rij(jnum); + snaptr->grow_rij(jnum); // rij[][3] = displacements between atom I and those neighbors // inside = indices of neighbors of I within cutoff @@ -269,31 +237,29 @@ void ComputeSNAVAtom::compute_peratom() const double rsq = delx*delx + dely*dely + delz*delz; int jtype = type[j]; if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - snaptr[tid]->rij[ninside][0] = delx; - snaptr[tid]->rij[ninside][1] = dely; - snaptr[tid]->rij[ninside][2] = delz; - snaptr[tid]->inside[ninside] = j; - snaptr[tid]->wj[ninside] = wjelem[jtype]; - snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jtype]; + snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; ninside++; } } - snaptr[tid]->compute_ui(ninside); - snaptr[tid]->compute_zi(); + snaptr->compute_ui(ninside); + snaptr->compute_zi(); if (quadraticflag) { - snaptr[tid]->compute_bi(); - snaptr[tid]->copy_bi2bvec(); + snaptr->compute_bi(); } for (int jj = 0; jj < ninside; jj++) { - const int j = snaptr[tid]->inside[jj]; + const int j = snaptr->inside[jj]; - snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj], - snaptr[tid]->wj[jj], - snaptr[tid]->rcutij[jj]); - snaptr[tid]->compute_dbidrj(); - snaptr[tid]->copy_dbi2dbvec(); + snaptr->compute_duidrj(snaptr->rij[jj], + snaptr->wj[jj], + snaptr->rcutij[jj]); + snaptr->compute_dbidrj(); // Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj @@ -301,18 +267,18 @@ void ComputeSNAVAtom::compute_peratom() double *snavj = snav[j]+typeoffset; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - snavi[icoeff] += snaptr[tid]->dbvec[icoeff][0]*xtmp; - snavi[icoeff+nperdim] += snaptr[tid]->dbvec[icoeff][1]*ytmp; - snavi[icoeff+2*nperdim] += snaptr[tid]->dbvec[icoeff][2]*ztmp; - snavi[icoeff+3*nperdim] += snaptr[tid]->dbvec[icoeff][1]*ztmp; - snavi[icoeff+4*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ztmp; - snavi[icoeff+5*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ytmp; - snavj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][0]; - snavj[icoeff+nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][1]; - snavj[icoeff+2*nperdim] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2]; - snavj[icoeff+3*nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2]; - snavj[icoeff+4*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2]; - snavj[icoeff+5*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1]; + snavi[icoeff] += snaptr->dblist[icoeff][0]*xtmp; + snavi[icoeff+nperdim] += snaptr->dblist[icoeff][1]*ytmp; + snavi[icoeff+2*nperdim] += snaptr->dblist[icoeff][2]*ztmp; + snavi[icoeff+3*nperdim] += snaptr->dblist[icoeff][1]*ztmp; + snavi[icoeff+4*nperdim] += snaptr->dblist[icoeff][0]*ztmp; + snavi[icoeff+5*nperdim] += snaptr->dblist[icoeff][0]*ytmp; + snavj[icoeff] -= snaptr->dblist[icoeff][0]*x[j][0]; + snavj[icoeff+nperdim] -= snaptr->dblist[icoeff][1]*x[j][1]; + snavj[icoeff+2*nperdim] -= snaptr->dblist[icoeff][2]*x[j][2]; + snavj[icoeff+3*nperdim] -= snaptr->dblist[icoeff][1]*x[j][2]; + snavj[icoeff+4*nperdim] -= snaptr->dblist[icoeff][0]*x[j][2]; + snavj[icoeff+5*nperdim] -= snaptr->dblist[icoeff][0]*x[j][1]; } if (quadraticflag) { @@ -321,10 +287,10 @@ void ComputeSNAVAtom::compute_peratom() snavj += quadraticoffset; int ncount = 0; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr[tid]->bvec[icoeff]; - double bix = snaptr[tid]->dbvec[icoeff][0]; - double biy = snaptr[tid]->dbvec[icoeff][1]; - double biz = snaptr[tid]->dbvec[icoeff][2]; + double bi = snaptr->blist[icoeff]; + double bix = snaptr->dblist[icoeff][0]; + double biy = snaptr->dblist[icoeff][1]; + double biz = snaptr->dblist[icoeff][2]; // diagonal element of quadratic matrix @@ -348,12 +314,12 @@ void ComputeSNAVAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0] - + bix*snaptr[tid]->bvec[jcoeff]; - double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1] - + biy*snaptr[tid]->bvec[jcoeff]; - double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2] - + biz*snaptr[tid]->bvec[jcoeff]; + double dbxtmp = bi*snaptr->dblist[jcoeff][0] + + bix*snaptr->blist[jcoeff]; + double dbytmp = bi*snaptr->dblist[jcoeff][1] + + biy*snaptr->blist[jcoeff]; + double dbztmp = bi*snaptr->dblist[jcoeff][2] + + biz*snaptr->blist[jcoeff]; snavi[ncount] += dbxtmp*xtmp; snavi[ncount+nperdim] += dbytmp*ytmp; snavi[ncount+2*nperdim] += dbztmp*ztmp; @@ -414,11 +380,8 @@ void ComputeSNAVAtom::unpack_reverse_comm(int n, int *list, double *buf) double ComputeSNAVAtom::memory_usage() { - double bytes = nmax*size_peratom_cols * sizeof(double); - bytes += 3*njmax*sizeof(double); - bytes += njmax*sizeof(int); - bytes += 6*nperdim*atom->ntypes; - if (quadraticflag) bytes += 6*nperdim*atom->ntypes; - bytes += snaptr[0]->memory_usage()*comm->nthreads; + double bytes = nmax*size_peratom_cols * sizeof(double); // snav + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/compute_snav_atom.h b/src/SNAP/compute_snav_atom.h index 9be5e1d389..9df17cc667 100644 --- a/src/SNAP/compute_snav_atom.h +++ b/src/SNAP/compute_snav_atom.h @@ -36,7 +36,7 @@ class ComputeSNAVAtom : public Compute { double memory_usage(); private: - int nmax, njmax, diagonalstyle; + int nmax; int ncoeff, nperdim; double **cutsq; class NeighList *list; @@ -44,9 +44,8 @@ class ComputeSNAVAtom : public Compute { double rcutfac; double *radelem; double *wjelem; - class SNA** snaptr; + class SNA* snaptr; int quadraticflag; - int nthreads; }; } diff --git a/src/SNAP/openmp_snap.h b/src/SNAP/openmp_snap.h deleted file mode 100644 index 60a3138c9c..0000000000 --- a/src/SNAP/openmp_snap.h +++ /dev/null @@ -1,16 +0,0 @@ - -#ifndef LMP_OPENMP_SNAP_H -#define LMP_OPENMP_SNAP_H - -#if defined(_OPENMP) -#include -#else -enum omp_sched_t {omp_sched_static, omp_sched_dynamic, omp_sched_guided, omp_sched_auto}; -inline int omp_get_thread_num() { return 0;} -inline int omp_set_num_threads(int num_threads) {return 1;} -/* inline int __sync_fetch_and_add(int* ptr, int value) {int tmp = *ptr; ptr[0]+=value; return tmp;} */ -inline void omp_set_schedule(omp_sched_t schedule,int modifier=1) {} -inline int omp_in_parallel() {return 0;} -#endif - -#endif diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 8b547e6e73..6eb05f85a4 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -23,7 +23,6 @@ #include "neigh_list.h" #include "neigh_request.h" #include "sna.h" -#include "openmp_snap.h" #include "domain.h" #include "memory.h" #include "error.h" @@ -35,10 +34,6 @@ using namespace LAMMPS_NS; #define MAXLINE 1024 #define MAXWORD 3 -// Outstanding issues with quadratic term -// 1. there seems to a problem with compute_optimized energy calc -// it does not match compute_regular, even when quadratic coeffs = 0 - /* ---------------------------------------------------------------------- */ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) @@ -54,53 +49,9 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) wjelem = NULL; coeffelem = NULL; - nmax = 0; - nthreads = 1; - - schedule_user = 0; - schedule_time_guided = -1; - schedule_time_dynamic = -1; - ncalls_neigh =-1; - - ilistmask_max = 0; - ilistmask = NULL; - ghostinum = 0; - ghostilist_max = 0; - ghostilist = NULL; - ghostnumneigh_max = 0; - ghostnumneigh = NULL; - ghostneighs = NULL; - ghostfirstneigh = NULL; - ghostneighs_total = 0; - ghostneighs_max = 0; - - i_max = 0; - i_neighmax = 0; - i_numpairs = 0; - i_rij = NULL; - i_inside = NULL; - i_wj = NULL; - i_rcutij = NULL; - i_ninside = NULL; - i_pairs = NULL; - i_uarraytot_r = NULL; - i_uarraytot_i = NULL; - i_zarray_r = NULL; - i_zarray_i =NULL; - - use_shared_arrays = 0; - -#ifdef TIMING_INFO - timers[0] = 0; - timers[1] = 0; - timers[2] = 0; - timers[3] = 0; -#endif - - // Need to set this because restart not handled by PairHybrid - - sna = NULL; - + beta_max = 0; + beta = NULL; + bispectrum = NULL; } /* ---------------------------------------------------------------------- */ @@ -118,35 +69,10 @@ PairSNAP::~PairSNAP() memory->destroy(coeffelem); } - // Need to set this because restart not handled by PairHybrid + memory->destroy(beta); + memory->destroy(bispectrum); - if (sna) { - -#ifdef TIMING_INFO - double time[5]; - double timeave[5]; - double timeave_mpi[5]; - double timemax_mpi[5]; - - for (int i = 0; i < 5; i++) { - time[i] = 0; - timeave[i] = 0; - for (int tid = 0; tidtimers[i]>time[i]) - time[i] = sna[tid]->timers[i]; - timeave[i] += sna[tid]->timers[i]; - } - timeave[i] /= nthreads; - } - MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world); - MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world); -#endif - - for (int tid = 0; tiddestroy(setflag); @@ -156,19 +82,11 @@ PairSNAP::~PairSNAP() } -void PairSNAP::compute(int eflag, int vflag) -{ - if (use_optimized) - compute_optimized(eflag, vflag); - else - compute_regular(eflag, vflag); -} - /* ---------------------------------------------------------------------- This version is a straightforward implementation ---------------------------------------------------------------------- */ -void PairSNAP::compute_regular(int eflag, int vflag) +void PairSNAP::compute(int eflag, int vflag) { int i,j,jnum,ninside; double delx,dely,delz,evdwl,rsq; @@ -183,7 +101,18 @@ void PairSNAP::compute_regular(int eflag, int vflag) int *type = atom->type; int nlocal = atom->nlocal; int newton_pair = force->newton_pair; - class SNA* snaptr = sna[0]; + + if (beta_max < list->inum) { + memory->grow(beta,list->inum,ncoeff,"PairSNAP:beta"); + memory->grow(bispectrum,list->inum,ncoeff,"PairSNAP:bispectrum"); + beta_max = list->inum; + } + + // compute dE_i/dB_i = beta_i for all i in list + + if (quadraticflag || eflag) + compute_bispectrum(); + compute_beta(); numneigh = list->numneigh; firstneigh = list->firstneigh; @@ -233,66 +162,22 @@ void PairSNAP::compute_regular(int eflag, int vflag) } } - // compute Ui, Zi, and Bi for atom I + // compute Ui, Yi for atom I snaptr->compute_ui(ninside); - snaptr->compute_zi(); - if (quadraticflag) { - snaptr->compute_bi(); - snaptr->copy_bi2bvec(); - } // for neighbors of I within cutoff: - // compute dUi/drj and dBi/drj - // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj + // compute Fij = dEi/dRj = -dEi/dRi + // add to Fi, subtract from Fj - double* coeffi = coeffelem[ielem]; + snaptr->compute_yi(beta[ii]); for (int jj = 0; jj < ninside; jj++) { int j = snaptr->inside[jj]; snaptr->compute_duidrj(snaptr->rij[jj], snaptr->wj[jj],snaptr->rcutij[jj]); - snaptr->compute_dbidrj(); - snaptr->copy_dbi2dbvec(); - - fij[0] = 0.0; - fij[1] = 0.0; - fij[2] = 0.0; - - // linear contributions - - for (int k = 1; k <= ncoeff; k++) { - double bgb = coeffi[k]; - fij[0] += bgb*snaptr->dbvec[k-1][0]; - fij[1] += bgb*snaptr->dbvec[k-1][1]; - fij[2] += bgb*snaptr->dbvec[k-1][2]; - } - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = snaptr->bvec[icoeff]; - double fack = coeffi[k]*bveci; - double* dbveci = snaptr->dbvec[icoeff]; - fij[0] += fack*dbveci[0]; - fij[1] += fack*dbveci[1]; - fij[2] += fack*dbveci[2]; - k++; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double facki = coeffi[k]*bveci; - double fackj = coeffi[k]*snaptr->bvec[jcoeff]; - double* dbvecj = snaptr->dbvec[jcoeff]; - - fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; - fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; - fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; - k++; - } - } - } + snaptr->compute_deidrj(fij); f[i][0] += fij[0]; f[i][1] += fij[1]; @@ -316,31 +201,26 @@ void PairSNAP::compute_regular(int eflag, int vflag) // evdwl = energy of atom I, sum over coeffs_k * Bi_k + double* coeffi = coeffelem[ielem]; evdwl = coeffi[0]; - if (!quadraticflag) { - snaptr->compute_bi(); - snaptr->copy_bi2bvec(); - } // E = beta.B + 0.5*B^t.alpha.B - // coeff[k] = beta[k-1] or - // coeff[k] = alpha_ii or - // coeff[k] = alpha_ij = alpha_ji, j != i // linear contributions - for (int k = 1; k <= ncoeff; k++) - evdwl += coeffi[k]*snaptr->bvec[k-1]; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + evdwl += coeffi[icoeff+1]*bispectrum[ii][icoeff]; // quadratic contributions if (quadraticflag) { int k = ncoeff+1; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = snaptr->bvec[icoeff]; + double bveci = bispectrum[ii][icoeff]; evdwl += 0.5*coeffi[k++]*bveci*bveci; for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - evdwl += coeffi[k++]*bveci*snaptr->bvec[jcoeff]; + double bvecj = bispectrum[ii][jcoeff]; + evdwl += coeffi[k++]*bveci*bvecj; } } } @@ -352,948 +232,106 @@ void PairSNAP::compute_regular(int eflag, int vflag) if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- + compute beta +------------------------------------------------------------------------- */ + +void PairSNAP::compute_beta() +{ + int i; + int *type = atom->type; + + for (int ii = 0; ii < list->inum; ii++) { + i = list->ilist[ii]; + const int itype = type[i]; + const int ielem = map[itype]; + double* coeffi = coeffelem[ielem]; + + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + beta[ii][icoeff] = coeffi[icoeff+1]; + + if (quadraticflag) { + int k = ncoeff+1; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bveci = bispectrum[ii][icoeff]; + beta[ii][icoeff] += coeffi[k]*bveci; + k++; + for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { + double bvecj = bispectrum[ii][jcoeff]; + beta[ii][icoeff] += coeffi[k]*bvecj; + beta[ii][jcoeff] += coeffi[k]*bveci; + k++; + } + } + } + } +} /* ---------------------------------------------------------------------- - This version is optimized for threading, micro-load balancing - ---------------------------------------------------------------------- */ + compute bispectrum +------------------------------------------------------------------------- */ -void PairSNAP::compute_optimized(int eflag, int vflag) +void PairSNAP::compute_bispectrum() { - // if reneighboring took place do load_balance if requested - if (do_load_balance > 0 && - (neighbor->ncalls != ncalls_neigh)) { - ghostinum = 0; - // reset local ghost neighbor lists - ncalls_neigh = neighbor->ncalls; - if (ilistmask_max < list->inum) { - memory->grow(ilistmask,list->inum,"PairSnap::ilistmask"); - ilistmask_max = list->inum; - } - for (int i = 0; i < list->inum; i++) - ilistmask[i] = 1; + int i,j,jnum,ninside; + double delx,dely,delz,rsq; + int *jlist,*numneigh,**firstneigh; - //multiple passes for loadbalancing - for (int i = 0; i < do_load_balance; i++) - load_balance(); - } + double **x = atom->x; + int *type = atom->type; - int numpairs = 0; for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - numpairs += jnum; - } - } + i = list->ilist[ii]; - if (do_load_balance) - for (int ii = 0; ii < ghostinum; ii++) { - int i = ghostilist[ii]; - int jnum = ghostnumneigh[i]; - numpairs += jnum; - } - - // optimized schedule setting - - int time_dynamic = 0; - int time_guided = 0; - - if (schedule_user == 0) schedule_user = 4; - - switch (schedule_user) { - case 1: - omp_set_schedule(omp_sched_static,1); - break; - case 2: - omp_set_schedule(omp_sched_dynamic,1); - break; - case 3: - omp_set_schedule(omp_sched_guided,2); - break; - case 4: - omp_set_schedule(omp_sched_auto,0); - break; - case 5: - if (numpairs < 8*nthreads) omp_set_schedule(omp_sched_dynamic,1); - else if (schedule_time_guided < 0.0) { - omp_set_schedule(omp_sched_guided,2); - if (!eflag && !vflag) time_guided = 1; - } else if (schedule_time_dynamic<0.0) { - omp_set_schedule(omp_sched_dynamic,1); - if (!eflag && !vflag) time_dynamic = 1; - } else if (schedule_time_guidedcreate(pairs_tid_unique,numpairs,4,"numpairs"); - pairs = pairs_tid_unique; - } - - if (!use_shared_arrays) { - numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - for (int jj = 0; jjx; - double **f = atom->f; - int *type = atom->type; - int nlocal = atom->nlocal; - int newton_pair = force->newton_pair; - - numneigh = list->numneigh; - firstneigh = list->firstneigh; - -#ifdef TIMING_INFO - // only update micro timers after setup - static int count=0; - if (count<2) { - sna[tid]->timers[0] = 0; - sna[tid]->timers[1] = 0; - sna[tid]->timers[2] = 0; - sna[tid]->timers[3] = 0; - sna[tid]->timers[4] = 0; - } - count++; -#endif - - // did thread start working on interactions of new atom - int iold = -1; - - double starttime, endtime; - if (time_dynamic || time_guided) - starttime = MPI_Wtime(); - -#if defined(_OPENMP) -#pragma omp for schedule(runtime) -#endif - for (int iijj = 0; iijj < numpairs; iijj++) { - int i = 0; - if (use_shared_arrays) { - i = i_pairs[iijj][0]; - if (iold != i) { - set_sna_to_shared(tid,i_pairs[iijj][3]); - ielem = map[type[i]]; - } - iold = i; - } else { - i = pairs[iijj][0]; - if (iold != i) { - iold = i; - const double xtmp = x[i][0]; - const double ytmp = x[i][1]; - const double ztmp = x[i][2]; - const int itype = type[i]; - ielem = map[itype]; - const double radi = radelem[ielem]; - - if (i < nlocal) { - jlist = firstneigh[i]; - jnum = numneigh[i]; - } else { - jlist = ghostneighs+ghostfirstneigh[i]; - jnum = ghostnumneigh[i]; - } - - // insure rij, inside, wj, and rcutij are of size jnum - - sna[tid]->grow_rij(jnum); - - // rij[][3] = displacements between atom I and those neighbors - // inside = indices of neighbors of I within cutoff - // wj = weights of neighbors of I within cutoff - // rcutij = cutoffs of neighbors of I within cutoff - // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi - - ninside = 0; - for (jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - delx = x[j][0] - xtmp; //unitialised - dely = x[j][1] - ytmp; - delz = x[j][2] - ztmp; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - int jelem = map[jtype]; - - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { //unitialised - sna[tid]->rij[ninside][0] = delx; - sna[tid]->rij[ninside][1] = dely; - sna[tid]->rij[ninside][2] = delz; - sna[tid]->inside[ninside] = j; - sna[tid]->wj[ninside] = wjelem[jelem]; - sna[tid]->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; - ninside++; - - // update index list with inside index - pairs[iijj + (jj - pairs[iijj][1])][2] = - ninside-1; //unitialised - } - } - - // compute Ui and Zi for atom I - - sna[tid]->compute_ui(ninside); //unitialised - sna[tid]->compute_zi(); - } - } - if (quadraticflag) { - sna[tid]->compute_bi(); - sna[tid]->copy_bi2bvec(); - } - - // for neighbors of I within cutoff: - // compute dUi/drj and dBi/drj - // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj - - // entry into loop if inside index is set - - double* coeffi = coeffelem[ielem]; - - if (pairs[iijj][2] >= 0) { - jj = pairs[iijj][2]; - int j = sna[tid]->inside[jj]; - sna[tid]->compute_duidrj(sna[tid]->rij[jj], - sna[tid]->wj[jj],sna[tid]->rcutij[jj]); - - sna[tid]->compute_dbidrj(); - sna[tid]->copy_dbi2dbvec(); - - fij[0] = 0.0; - fij[1] = 0.0; - fij[2] = 0.0; - - // linear contributions - - for (k = 1; k <= ncoeff; k++) { - double bgb = coeffi[k]; - fij[0] += bgb*sna[tid]->dbvec[k-1][0]; - fij[1] += bgb*sna[tid]->dbvec[k-1][1]; - fij[2] += bgb*sna[tid]->dbvec[k-1][2]; - } - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = sna[tid]->bvec[icoeff]; - double fack = coeffi[k]*bveci; - double* dbveci = sna[tid]->dbvec[icoeff]; - fij[0] += fack*sna[tid]->dbvec[icoeff][0]; - fij[1] += fack*sna[tid]->dbvec[icoeff][1]; - fij[2] += fack*sna[tid]->dbvec[icoeff][2]; - k++; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double facki = coeffi[k]*bveci; - double fackj = coeffi[k]*sna[tid]->bvec[jcoeff]; - double* dbvecj = sna[tid]->dbvec[jcoeff]; - fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; - fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; - fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; - k++; - } - } - } - -#if defined(_OPENMP) -#pragma omp critical -#endif - { - f[i][0] += fij[0]; - f[i][1] += fij[1]; - f[i][2] += fij[2]; - f[j][0] -= fij[0]; - f[j][1] -= fij[1]; - f[j][2] -= fij[2]; - - // tally per-atom virial contribution - - if (vflag) - ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0, - fij[0],fij[1],fij[2], - -sna[tid]->rij[jj][0],-sna[tid]->rij[jj][1], - -sna[tid]->rij[jj][2]); - } - } - - // evdwl = energy of atom I, sum over coeffs_k * Bi_k - // only call this for first pair of each atom i - // if atom has no pairs, eatom=0, which is wrong - - if (eflag&&pairs[iijj][1] == 0) { - evdwl = coeffi[0]; - - if (!quadraticflag) { - sna[tid]->compute_bi(); - sna[tid]->copy_bi2bvec(); - } - - // E = beta.B + 0.5*B^t.alpha.B - // coeff[k] = beta[k-1] or - // coeff[k] = alpha_ii or - // coeff[k] = alpha_ij = alpha_ji, j != i - - // linear contributions - - for (int k = 1; k <= ncoeff; k++) - evdwl += coeffi[k]*sna[tid]->bvec[k-1]; - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = sna[tid]->bvec[icoeff]; - evdwl += 0.5*coeffi[k++]*bveci*bveci; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - evdwl += coeffi[k++]*bveci*sna[tid]->bvec[jcoeff]; - } - } - } - -#if defined(_OPENMP) -#pragma omp critical -#endif - ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); - } - - } - if (time_dynamic || time_guided) - endtime = MPI_Wtime(); - if (time_dynamic) schedule_time_dynamic = endtime - starttime; - if (time_guided) schedule_time_guided = endtime - starttime; - if (!use_shared_arrays) memory->destroy(pairs); - - }// end of pragma omp parallel - - if (vflag_fdotr) virial_fdotr_compute(); - -} - -inline int PairSNAP::equal(double* x,double* y) -{ - double dist2 = - (x[0]-y[0])*(x[0]-y[0]) + - (x[1]-y[1])*(x[1]-y[1]) + - (x[2]-y[2])*(x[2]-y[2]); - if (dist2 < 1e-20) return 1; - return 0; -} - -inline double PairSNAP::dist2(double* x,double* y) -{ - return - (x[0]-y[0])*(x[0]-y[0]) + - (x[1]-y[1])*(x[1]-y[1]) + - (x[2]-y[2])*(x[2]-y[2]); -} - -// return extra communication cutoff -// extra_cutoff = max(subdomain_length) - -double PairSNAP::extra_cutoff() -{ - double sublo[3],subhi[3]; - - if (domain->triclinic == 0) { - for (int dim = 0 ; dim < 3 ; dim++) { - sublo[dim] = domain->sublo[dim]; - subhi[dim] = domain->subhi[dim]; - } - } else { - domain->lamda2x(domain->sublo_lamda,sublo); - domain->lamda2x(domain->subhi_lamda,subhi); - } - - double sub_size[3]; - for (int dim = 0; dim < 3; dim++) - sub_size[dim] = subhi[dim] - sublo[dim]; - - double max_sub_size = 0; - for (int dim = 0; dim < 3; dim++) - max_sub_size = MAX(max_sub_size,sub_size[dim]); - - // note: for triclinic, probably need something different - // see Comm::setup() - - return max_sub_size; -} - -// micro load_balancer: each MPI process will -// check with each of its 26 neighbors, -// whether an imbalance exists in the number -// of atoms to calculate forces for. -// If it does it will set ilistmask of one of -// its local atoms to zero, and send its Tag -// to the neighbor process. The neighboring process -// will check its ghost list for the -// ghost atom with the same Tag which is closest -// to its domain center, and build a -// neighborlist for this ghost atom. For this to work, -// the communication cutoff has to be -// as large as the neighbor cutoff + -// maximum subdomain length. - -// Note that at most one atom is exchanged per processor pair. - -// Also note that the local atom assignment -// doesn't change. This load balancer will cause -// some ghost atoms to have full neighborlists -// which are unique to PairSNAP. -// They are not part of the generally accessible neighborlist. -// At the same time corresponding local atoms on -// other MPI processes will not be -// included in the force computation since -// their ilistmask is 0. This does not effect -// any other classes which might -// access the same general neighborlist. -// Reverse communication (newton on) of forces is required. - -// Currently the load balancer does two passes, -// since its exchanging atoms upstream and downstream. - -void PairSNAP::load_balance() -{ - double sublo[3],subhi[3]; - if (domain->triclinic == 0) { - double* sublotmp = domain->sublo; - double* subhitmp = domain->subhi; - for (int dim = 0 ; dim<3 ; dim++) { - sublo[dim]=sublotmp[dim]; - subhi[dim]=subhitmp[dim]; - } - } else { - double* sublotmp = domain->sublo_lamda; - double* subhitmp = domain->subhi_lamda; - domain->lamda2x(sublotmp,sublo); - domain->lamda2x(subhitmp,subhi); - } - - //if (list->inum==0) list->grow(atom->nmax); - - int nlocal = ghostinum; - for (int i=0; i < list->inum; i++) - if (ilistmask[i]) nlocal++; - int ***grid2proc = comm->grid2proc; - int* procgrid = comm->procgrid; - - int nlocal_up,nlocal_down; - MPI_Request request; - - double sub_mid[3]; - for (int dim=0; dim<3; dim++) - sub_mid[dim] = (subhi[dim] + sublo[dim])/2; - - if (comm->cutghostuser < - neighbor->cutneighmax+extra_cutoff()) - error->all(FLERR,"Communication cutoff too small for SNAP micro load balancing"); - - int nrecv = ghostinum; - int totalsend = 0; - int nsend = 0; - int depth = 1; - - for (int dx = -depth; dx < depth+1; dx++) - for (int dy = -depth; dy < depth+1; dy++) - for (int dz = -depth; dz < depth+1; dz++) { - - if (dx == dy && dy == dz && dz == 0) continue; - - int sendloc[3] = {comm->myloc[0], - comm->myloc[1], comm->myloc[2] - }; - sendloc[0] += dx; - sendloc[1] += dy; - sendloc[2] += dz; - for (int dim = 0; dim < 3; dim++) - if (sendloc[dim] >= procgrid[dim]) - sendloc[dim] = sendloc[dim] - procgrid[dim]; - for (int dim = 0; dim < 3; dim++) - if (sendloc[dim] < 0) - sendloc[dim] = procgrid[dim] + sendloc[dim]; - int recvloc[3] = {comm->myloc[0], - comm->myloc[1], comm->myloc[2] - }; - recvloc[0] -= dx; - recvloc[1] -= dy; - recvloc[2] -= dz; - for (int dim = 0; dim < 3; dim++) - if (recvloc[dim] < 0) - recvloc[dim] = procgrid[dim] + recvloc[dim]; - for (int dim = 0; dim < 3; dim++) - if (recvloc[dim] >= procgrid[dim]) - recvloc[dim] = recvloc[dim] - procgrid[dim]; - - int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]]; - int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]]; - - // two stage process, first upstream movement, then downstream - - MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, - &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, - &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - nsend = 0; - - // send upstream - - if (nlocal > nlocal_up+1) { - - int i = totalsend++; - while(i < list->inum && ilistmask[i] == 0) - i = totalsend++; - - if (i < list->inum) - MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request); - else { - int j = -1; - MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request); - } - - if (i < list->inum) { - for (int j = 0; j < list->inum; j++) - if (list->ilist[j] == i) - ilistmask[j] = 0; - nsend = 1; - } - } - - // recv downstream - - if (nlocal < nlocal_down-1) { - nlocal++; - int get_tag = -1; - MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - - // if get_tag -1 the other process didnt have local atoms to send - - if (get_tag >= 0) { - if (ghostinum >= ghostilist_max) { - memory->grow(ghostilist,ghostinum+10, - "PairSnap::ghostilist"); - ghostilist_max = ghostinum+10; - } - if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { - ghostnumneigh_max = atom->nlocal+atom->nghost+100; - memory->grow(ghostnumneigh,ghostnumneigh_max, - "PairSnap::ghostnumneigh"); - memory->grow(ghostfirstneigh,ghostnumneigh_max, - "PairSnap::ghostfirstneigh"); - } - - // find closest ghost image of the transfered particle - - double mindist = 1e200; - int closestghost = -1; - for (int j = 0; j < atom->nlocal + atom->nghost; j++) - if (atom->tag[j] == get_tag) - if (dist2(sub_mid, atom->x[j]) < mindist) { - closestghost = j; - mindist = dist2(sub_mid, atom->x[j]); - } - - // build neighborlist for this particular - // ghost atom, and add it to list->ilist - - if (ghostneighs_max - ghostneighs_total < - neighbor->oneatom) { - memory->grow(ghostneighs, - ghostneighs_total + neighbor->oneatom, - "PairSnap::ghostneighs"); - ghostneighs_max = ghostneighs_total + neighbor->oneatom; - } - - int j = closestghost; - - ghostilist[ghostinum] = j; - ghostnumneigh[j] = 0; - ghostfirstneigh[j] = ghostneighs_total; - - ghostinum++; - int* jlist = ghostneighs + ghostfirstneigh[j]; - - // find all neighbors by looping - // over all local and ghost atoms - - for (int k = 0; k < atom->nlocal + atom->nghost; k++) - if (dist2(atom->x[j],atom->x[k]) < - neighbor->cutneighmax*neighbor->cutneighmax) { - jlist[ghostnumneigh[j]] = k; - ghostnumneigh[j]++; - ghostneighs_total++; - } - } - - if (get_tag >= 0) nrecv++; - } - - // decrease nlocal later, so that it is the - // initial number both for receiving and sending - - if (nsend) nlocal--; - - // second pass through the grid - - MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, - &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, - &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - - // send downstream - - nsend=0; - if (nlocal > nlocal_down+1) { - int i = totalsend++; - while(i < list->inum && ilistmask[i]==0) i = totalsend++; - - if (i < list->inum) - MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request); - else { - int j =- 1; - MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request); - } - - if (i < list->inum) { - for (int j=0; jinum; j++) - if (list->ilist[j] == i) ilistmask[j] = 0; - nsend = 1; - } - } - - // receive upstream - - if (nlocal < nlocal_up-1) { - nlocal++; - int get_tag = -1; - - MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - - if (get_tag >= 0) { - if (ghostinum >= ghostilist_max) { - memory->grow(ghostilist,ghostinum+10, - "PairSnap::ghostilist"); - ghostilist_max = ghostinum+10; - } - if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { - ghostnumneigh_max = atom->nlocal + atom->nghost + 100; - memory->grow(ghostnumneigh,ghostnumneigh_max, - "PairSnap::ghostnumneigh"); - memory->grow(ghostfirstneigh,ghostnumneigh_max, - "PairSnap::ghostfirstneigh"); - } - - // find closest ghost image of the transfered particle - - double mindist = 1e200; - int closestghost = -1; - for (int j = 0; j < atom->nlocal + atom->nghost; j++) - if (atom->tag[j] == get_tag) - if (dist2(sub_mid,atom->x[j])x[j]); - } - - // build neighborlist for this particular ghost atom - - if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) { - memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom, - "PairSnap::ghostneighs"); - ghostneighs_max = ghostneighs_total + neighbor->oneatom; - } - - int j = closestghost; - - ghostilist[ghostinum] = j; - ghostnumneigh[j] = 0; - ghostfirstneigh[j] = ghostneighs_total; - - ghostinum++; - int* jlist = ghostneighs + ghostfirstneigh[j]; - - for (int k = 0; k < atom->nlocal + atom->nghost; k++) - if (dist2(atom->x[j],atom->x[k]) < - neighbor->cutneighmax*neighbor->cutneighmax) { - jlist[ghostnumneigh[j]] = k; - ghostnumneigh[j]++; - ghostneighs_total++; - } - } - - if (get_tag >= 0) nrecv++; - } - if (nsend) nlocal--; - } -} - -void PairSNAP::set_sna_to_shared(int snaid,int i) -{ - sna[snaid]->rij = i_rij[i]; - sna[snaid]->inside = i_inside[i]; - sna[snaid]->wj = i_wj[i]; - sna[snaid]->rcutij = i_rcutij[i]; - sna[snaid]->zarray_r = i_zarray_r[i]; - sna[snaid]->zarray_i = i_zarray_i[i]; - sna[snaid]->uarraytot_r = i_uarraytot_r[i]; - sna[snaid]->uarraytot_i = i_uarraytot_i[i]; -} - -void PairSNAP::build_per_atom_arrays() -{ - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - - int count = 0; - int neighmax = 0; - for (int ii = 0; ii < list->inum; ii++) - if ((do_load_balance <= 0) || ilistmask[ii]) { - neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]); - ++count; - } - for (int ii = 0; ii < ghostinum; ii++) { - neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]); - ++count; - } - - if (i_max < count || i_neighmax < neighmax) { - int i_maxt = MAX(count,i_max); - i_neighmax = MAX(neighmax,i_neighmax); - memory->destroy(i_rij); - memory->destroy(i_inside); - memory->destroy(i_wj); - memory->destroy(i_rcutij); - memory->destroy(i_ninside); - memory->destroy(i_pairs); - memory->create(i_rij,i_maxt,i_neighmax,3,"PairSNAP::i_rij"); - memory->create(i_inside,i_maxt,i_neighmax,"PairSNAP::i_inside"); - memory->create(i_wj,i_maxt,i_neighmax,"PairSNAP::i_wj"); - memory->create(i_rcutij,i_maxt,i_neighmax,"PairSNAP::i_rcutij"); - memory->create(i_ninside,i_maxt,"PairSNAP::i_ninside"); - memory->create(i_pairs,i_maxt*i_neighmax,4,"PairSNAP::i_pairs"); - } - - if (i_max < count) { - int jdim = sna[0]->twojmax+1; - memory->destroy(i_uarraytot_r); - memory->destroy(i_uarraytot_i); - memory->create(i_uarraytot_r,count,jdim,jdim,jdim, - "PairSNAP::i_uarraytot_r"); - memory->create(i_uarraytot_i,count,jdim,jdim,jdim, - "PairSNAP::i_uarraytot_i"); - if (i_zarray_r != NULL) - for (int i = 0; i < i_max; i++) { - memory->destroy(i_zarray_r[i]); - memory->destroy(i_zarray_i[i]); - } - - delete [] i_zarray_r; - delete [] i_zarray_i; - i_zarray_r = new double*****[count]; - i_zarray_i = new double*****[count]; - for (int i = 0; i < count; i++) { - memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim, - "PairSNAP::i_zarray_r"); - memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim, - "PairSNAP::i_zarray_i"); - } - } - - if (i_max < count) - i_max = count; - - count = 0; - i_numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - int* jlist = list->firstneigh[i]; - const double xtmp = atom->x[i][0]; - const double ytmp = atom->x[i][1]; - const double ztmp = atom->x[i][2]; - const int itype = atom->type[i]; - const int ielem = map[itype]; - const double radi = radelem[ielem]; - int ninside = 0; - for (int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - const double delx = atom->x[j][0] - xtmp; - const double dely = atom->x[j][1] - ytmp; - const double delz = atom->x[j][2] - ztmp; - const double rsq = delx*delx + dely*dely + delz*delz; - int jtype = atom->type[j]; - int jelem = map[jtype]; - - i_pairs[i_numpairs][0] = i; - i_pairs[i_numpairs][1] = jj; - i_pairs[i_numpairs][2] = -1; - i_pairs[i_numpairs][3] = count; - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - i_rij[count][ninside][0] = delx; - i_rij[count][ninside][1] = dely; - i_rij[count][ninside][2] = delz; - i_inside[count][ninside] = j; - i_wj[count][ninside] = wjelem[jelem]; - i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; - - // update index list with inside index - i_pairs[i_numpairs][2] = ninside++; - } - i_numpairs++; - } - i_ninside[count] = ninside; - count++; - } - } - - for (int ii = 0; ii < ghostinum; ii++) { - int i = ghostilist[ii]; - int jnum = ghostnumneigh[i]; - int* jlist = ghostneighs+ghostfirstneigh[i]; - const double xtmp = atom->x[i][0]; - const double ytmp = atom->x[i][1]; - const double ztmp = atom->x[i][2]; - const int itype = atom->type[i]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const int itype = type[i]; const int ielem = map[itype]; const double radi = radelem[ielem]; - int ninside = 0; + jlist = list->firstneigh[i]; + jnum = list->numneigh[i]; + + // insure rij, inside, wj, and rcutij are of size jnum + + snaptr->grow_rij(jnum); + + // rij[][3] = displacements between atom I and those neighbors + // inside = indices of neighbors of I within cutoff + // wj = weights for neighbors of I within cutoff + // rcutij = cutoffs for neighbors of I within cutoff + // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi + + ninside = 0; for (int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; + j = jlist[jj]; j &= NEIGHMASK; - const double delx = atom->x[j][0] - xtmp; - const double dely = atom->x[j][1] - ytmp; - const double delz = atom->x[j][2] - ztmp; - const double rsq = delx*delx + dely*dely + delz*delz; - int jtype = atom->type[j]; + delx = x[j][0] - xtmp; + dely = x[j][1] - ytmp; + delz = x[j][2] - ztmp; + rsq = delx*delx + dely*dely + delz*delz; + int jtype = type[j]; int jelem = map[jtype]; - i_pairs[i_numpairs][0] = i; - i_pairs[i_numpairs][1] = jj; - i_pairs[i_numpairs][2] = -1; - i_pairs[i_numpairs][3] = count; if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - i_rij[count][ninside][0] = delx; - i_rij[count][ninside][1] = dely; - i_rij[count][ninside][2] = delz; - i_inside[count][ninside] = j; - i_wj[count][ninside] = wjelem[jelem]; - i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; - // update index list with inside index - i_pairs[i_numpairs][2] = ninside++; + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jelem]; + snaptr->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; + ninside++; } - i_numpairs++; } - i_ninside[count] = ninside; - count++; - } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif -#if defined(_OPENMP) -#pragma omp parallel for shared(count) default(none) -#endif - for (int ii=0; ii < count; ii++) { - int tid = omp_get_thread_num(); - set_sna_to_shared(tid,ii); - //sna[tid]->compute_ui(i_ninside[ii]); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1)); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif - } + snaptr->compute_ui(ninside); + snaptr->compute_zi(); + snaptr->compute_bi(); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - for (int ii=0; ii < count; ii++) { - int tid = 0;//omp_get_thread_num(); - set_sna_to_shared(tid,ii); - sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1)); + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + bispectrum[ii][icoeff] = snaptr->blist[icoeff]; } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif } /* ---------------------------------------------------------------------- @@ -1316,96 +354,8 @@ void PairSNAP::allocate() void PairSNAP::settings(int narg, char **arg) { - - // set default values for optional arguments - - nthreads = -1; - use_shared_arrays=-1; - do_load_balance = 0; - use_optimized = 1; - - // optional arguments - - for (int i=0; i < narg; i++) { - if (i+2>narg) error->all(FLERR,"Illegal pair_style command"); - if (strcmp(arg[i],"nthreads")==0) { - nthreads=force->inumeric(FLERR,arg[++i]); -#if defined(LMP_USER_OMP) - error->all(FLERR,"Must set number of threads via package omp command"); -#else - omp_set_num_threads(nthreads); - comm->nthreads=nthreads; -#endif - continue; - } - if (strcmp(arg[i],"optimized")==0) { - use_optimized=force->inumeric(FLERR,arg[++i]); - continue; - } - if (strcmp(arg[i],"shared")==0) { - use_shared_arrays=force->inumeric(FLERR,arg[++i]); - continue; - } - if (strcmp(arg[i],"loadbalance")==0) { - do_load_balance = force->inumeric(FLERR,arg[++i]); - if (do_load_balance) { - double mincutoff = extra_cutoff() + - rcutmax + neighbor->skin; - if (comm->cutghostuser < mincutoff) { - char buffer[255]; - - //apparently mincutoff is 0 after sprintf command ????? - - double tmp = mincutoff + 0.1; - sprintf(buffer, "Communication cutoff is too small " - "for SNAP micro load balancing, increased to %lf", - mincutoff+0.1); - if (comm->me==0) - error->warning(FLERR,buffer); - - comm->cutghostuser = tmp; - - } - } - continue; - } - if (strcmp(arg[i],"schedule")==0) { - i++; - if (strcmp(arg[i],"static")==0) - schedule_user = 1; - if (strcmp(arg[i],"dynamic")==0) - schedule_user = 2; - if (strcmp(arg[i],"guided")==0) - schedule_user = 3; - if (strcmp(arg[i],"auto")==0) - schedule_user = 4; - if (strcmp(arg[i],"determine")==0) - schedule_user = 5; - if (schedule_user == 0) - error->all(FLERR,"Illegal pair_style command"); - continue; - } + for (int i=0; i < narg; i++) error->all(FLERR,"Illegal pair_style command"); - } - - if (nthreads < 0) - nthreads = comm->nthreads; - - if (use_shared_arrays < 0) { - if (nthreads > 1 && atom->nlocal <= 2*nthreads) - use_shared_arrays = 1; - else use_shared_arrays = 0; - } - - // check if running non-optimized code with - // optimization flags set - - if (!use_optimized) - if (nthreads > 1 || - use_shared_arrays || - do_load_balance || - schedule_user) - error->all(FLERR,"Illegal pair_style command"); } /* ---------------------------------------------------------------------- @@ -1493,26 +443,12 @@ void PairSNAP::coeff(int narg, char **arg) if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); - sna = new SNA*[nthreads]; + snaptr = new SNA(lmp,rfac0,twojmax, + rmin0,switchflag,bzeroflag); - // allocate memory for per OpenMP thread data which - // is wrapped into the sna class - -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - sna[tid] = new SNA(lmp,rfac0,twojmax, - diagonalstyle,use_shared_arrays, - rmin0,switchflag,bzeroflag); - if (!use_shared_arrays) - sna[tid]->grow_rij(nmax); - } - - if (ncoeff != sna[0]->ncoeff) { + if (ncoeff != snaptr->ncoeff) { if (comm->me == 0) - printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff); + printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff); error->all(FLERR,"Incorrect SNAP parameter file"); } @@ -1539,13 +475,7 @@ void PairSNAP::init_style() neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->full = 1; -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - sna[tid]->init(); - } + snaptr->init(); } @@ -1693,6 +623,8 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) } } + if (comm->me == 0) fclose(fpcoeff); + // set flags for required keywords rcutfacflag = 0; @@ -1702,7 +634,6 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) rfac0 = 0.99363; rmin0 = 0.0; - diagonalstyle = 3; switchflag = 1; bzeroflag = 1; quadraticflag = 0; @@ -1763,8 +694,6 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) rfac0 = atof(keyval); else if (strcmp(keywd,"rmin0") == 0) rmin0 = atof(keyval); - else if (strcmp(keywd,"diagonalstyle") == 0) - diagonalstyle = atoi(keyval); else if (strcmp(keywd,"switchflag") == 0) switchflag = atoi(keyval); else if (strcmp(keywd,"bzeroflag") == 0) @@ -1787,14 +716,16 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) double PairSNAP::memory_usage() { double bytes = Pair::memory_usage(); + int n = atom->ntypes+1; - bytes += n*n*sizeof(int); - bytes += n*n*sizeof(double); - bytes += 3*nmax*sizeof(double); - bytes += nmax*sizeof(int); - bytes += (2*ncoeffall)*sizeof(double); - bytes += (ncoeff*3)*sizeof(double); - bytes += sna[0]->memory_usage()*nthreads; + bytes += n*n*sizeof(int); // setflag + bytes += n*n*sizeof(double); // cutsq + bytes += n*sizeof(int); // map + bytes += beta_max*ncoeff*sizeof(double); // bispectrum + bytes += beta_max*ncoeff*sizeof(double); // beta + + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/pair_snap.h b/src/SNAP/pair_snap.h index 1fa065755c..c64eaa5d4e 100644 --- a/src/SNAP/pair_snap.h +++ b/src/SNAP/pair_snap.h @@ -29,8 +29,6 @@ public: PairSNAP(class LAMMPS *); ~PairSNAP(); virtual void compute(int, int); - void compute_regular(int, int); - void compute_optimized(int, int); void settings(int, char **); virtual void coeff(int, char **); virtual void init_style(); @@ -42,56 +40,14 @@ public: protected: int ncoeffq, ncoeffall; - double **bvec, ***dbvec; - class SNA** sna; - int nmax; - int nthreads; + class SNA* snaptr; virtual void allocate(); void read_files(char *, char *); inline int equal(double* x,double* y); inline double dist2(double* x,double* y); - double extra_cutoff(); - void load_balance(); - void set_sna_to_shared(int snaid,int i); - void build_per_atom_arrays(); - int schedule_user; - double schedule_time_guided; - double schedule_time_dynamic; - - int ncalls_neigh; - int do_load_balance; - int ilistmask_max; - int* ilistmask; - int ghostinum; - int ghostilist_max; - int* ghostilist; - int ghostnumneigh_max; - int* ghostnumneigh; - int* ghostneighs; - int* ghostfirstneigh; - int ghostneighs_total; - int ghostneighs_max; - - int use_optimized; - int use_shared_arrays; - - int i_max; - int i_neighmax; - int i_numpairs; - int **i_pairs; - double ***i_rij; - int **i_inside; - double **i_wj; - double **i_rcutij; - int *i_ninside; - double ****i_uarraytot_r, ****i_uarraytot_i; - double ******i_zarray_r, ******i_zarray_i; - -#ifdef TIMING_INFO - // timespec starttime, endtime; - double timers[4]; -#endif + void compute_beta(); + void compute_bispectrum(); double rcutmax; // max cutoff for all elements int nelements; // # of unique elements @@ -99,10 +55,13 @@ protected: double *radelem; // element radii double *wjelem; // elements weights double **coeffelem; // element bispectrum coefficients + double** beta; // betas for all atoms in list + double** bispectrum; // bispectrum components for all atoms in list int *map; // mapping from atom types to elements - int twojmax, diagonalstyle, switchflag, bzeroflag; + int twojmax, switchflag, bzeroflag; double rfac0, rmin0, wj1, wj2; int rcutfacflag, twojmaxflag; // flags for required parameters + int beta_max; // length of beta }; } @@ -124,15 +83,6 @@ Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. -E: Must set number of threads via package omp command - -Because you are using the USER-OMP package, set the number of threads -via its settings, not by the pair_style snap nthreads setting. - -W: Communication cutoff is too small for SNAP micro load balancing, increased to %lf - -Self-explanatory. - E: Incorrect args for pair coefficients Self-explanatory. Check the input script or data file. diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index 7ed1bc1e23..ec545c51b2 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -21,7 +21,6 @@ #include "math_extra.h" #include #include -#include "openmp_snap.h" #include "memory.h" #include "error.h" @@ -114,34 +113,30 @@ using namespace MathConst; ------------------------------------------------------------------------- */ SNA::SNA(LAMMPS* lmp, double rfac0_in, - int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in, + int twojmax_in, double rmin0_in, int switch_flag_in, int bzero_flag_in) : Pointers(lmp) { wself = 1.0; - use_shared_arrays = use_shared_arrays_in; rfac0 = rfac0_in; rmin0 = rmin0_in; switch_flag = switch_flag_in; bzero_flag = bzero_flag_in; twojmax = twojmax_in; - diagonalstyle = diagonalstyle_in; ncoeff = compute_ncoeff(); - create_twojmax_arrays(); - - bvec = NULL; - dbvec = NULL; - memory->create(bvec, ncoeff, "pair:bvec"); - memory->create(dbvec, ncoeff, 3, "pair:dbvec"); rij = NULL; inside = NULL; wj = NULL; rcutij = NULL; nmax = 0; - idxj = NULL; + idxz = NULL; + idxb = NULL; + + build_indexlist(); + create_twojmax_arrays(); if (bzero_flag) { double www = wself*wself*wself; @@ -149,134 +144,142 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in, bzero[j] = www*(j+1); } -#ifdef TIMING_INFO - timers = new double[20]; - for(int i = 0; i < 20; i++) timers[i] = 0; - print = 0; - counter = 0; -#endif - - build_indexlist(); - - } /* ---------------------------------------------------------------------- */ SNA::~SNA() { - if(!use_shared_arrays) { - destroy_twojmax_arrays(); - memory->destroy(rij); - memory->destroy(inside); - memory->destroy(wj); - memory->destroy(rcutij); - memory->destroy(bvec); - memory->destroy(dbvec); - } - delete[] idxj; + memory->destroy(rij); + memory->destroy(inside); + memory->destroy(wj); + memory->destroy(rcutij); + delete[] idxz; + delete[] idxb; + destroy_twojmax_arrays(); } void SNA::build_indexlist() { - if(diagonalstyle == 0) { - int idxj_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - idxj_count++; + // index list for cglist - // indexList can be changed here + int jdim = twojmax + 1; + memory->create(idxcg_block, jdim, jdim, jdim, + "sna:idxcg_block"); - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; + int idxcg_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + idxcg_block[j1][j2][j] = idxcg_count; + for (int m1 = 0; m1 <= j1; m1++) + for (int m2 = 0; m2 <= j2; m2++) + idxcg_count++; + } + idxcg_max = idxcg_count; - idxj_count = 0; + // index list for uarray + // need to include both halves - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j2; - idxj[idxj_count].j = j; - idxj_count++; + memory->create(idxu_block, jdim, + "sna:idxu_block"); + + int idxu_count = 0; + + for(int j = 0; j <= twojmax; j++) { + idxu_block[j] = idxu_count; + for(int mb = 0; mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) + idxu_count++; + } + idxu_max = idxu_count; + + // index list for beta and B + + int idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) idxb_count++; + + idxb_max = idxb_count; + idxb = new SNA_BINDICES[idxb_max]; + + idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) { + idxb[idxb_count].j1 = j1; + idxb[idxb_count].j2 = j2; + idxb[idxb_count].j = j; + idxb_count++; } - } - if(diagonalstyle == 1) { - int idxj_count = 0; + // reverse index list for beta and b - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) { - idxj_count++; + memory->create(idxb_block, jdim, jdim, jdim, + "sna:idxb_block"); + idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + if (j >= j1) { + idxb_block[j1][j2][j] = idxb_count; + idxb_count++; + } } - // indexList can be changed here + // index list for zlist - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; + int idxz_count = 0; - idxj_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) + idxz_count++; + + idxz_max = idxz_count; + idxz = new SNA_ZINDICES[idxz_max]; + + memory->create(idxz_block, jdim, jdim, jdim, + "sna:idxz_block"); + + idxz_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + idxz_block[j1][j2][j] = idxz_count; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j1; - idxj[idxj_count].j = j; - idxj_count++; - } - } + // find right beta[jjb] entry + // multiply and divide by j+1 factors + // account for multiplicity of 1, 2, or 3 - if(diagonalstyle == 2) { - int idxj_count = 0; + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + idxz[idxz_count].j1 = j1; + idxz[idxz_count].j2 = j2; + idxz[idxz_count].j = j; + idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2); + idxz[idxz_count].ma2max = (2 * ma - j - (2 * idxz[idxz_count].ma1min - j1) + j2) / 2; + idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count].ma1min + 1; + idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2); + idxz[idxz_count].mb2max = (2 * mb - j - (2 * idxz[idxz_count].mb1min - j1) + j2) / 2; + idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1; - for(int j1 = 0; j1 <= twojmax; j1++) { - idxj_count++; - } + // apply to z(j1,j2,j,ma,mb) to unique element of y(j) - // indexList can be changed here + const int jju = idxu_block[j] + (j+1)*mb + ma; + idxz[idxz_count].jju = jju; - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; - - idxj_count = 0; - - for(int j1 = 0; j1 <= twojmax; j1++) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j1; - idxj[idxj_count].j = j1; - idxj_count++; - } - } - - if(diagonalstyle == 3) { - int idxj_count = 0; - - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) idxj_count++; - - // indexList can be changed here - - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; - - idxj_count = 0; - - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j2; - idxj[idxj_count].j = j; - idxj_count++; + idxz_count++; } - } - + } } + /* ---------------------------------------------------------------------- */ void SNA::init() @@ -292,17 +295,16 @@ void SNA::grow_rij(int newnmax) nmax = newnmax; - if(!use_shared_arrays) { - memory->destroy(rij); - memory->destroy(inside); - memory->destroy(wj); - memory->destroy(rcutij); - memory->create(rij, nmax, 3, "pair:rij"); - memory->create(inside, nmax, "pair:inside"); - memory->create(wj, nmax, "pair:wj"); - memory->create(rcutij, nmax, "pair:rcutij"); - } + memory->destroy(rij); + memory->destroy(inside); + memory->destroy(wj); + memory->destroy(rcutij); + memory->create(rij, nmax, 3, "pair:rij"); + memory->create(inside, nmax, "pair:inside"); + memory->create(wj, nmax, "pair:wj"); + memory->create(rcutij, nmax, "pair:rcutij"); } + /* ---------------------------------------------------------------------- compute Ui by summing over neighbors j ------------------------------------------------------------------------- */ @@ -320,10 +322,6 @@ void SNA::compute_ui(int jnum) zero_uarraytot(); addself_uarraytot(wself); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - for(int j = 0; j < jnum; j++) { x = rij[j][0]; y = rij[j][1]; @@ -339,48 +337,6 @@ void SNA::compute_ui(int jnum) add_uarraytot(r, wj[j], rcutij[j]); } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[0] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - -} - -void SNA::compute_ui_omp(int jnum, int sub_threads) -{ - double rsq, r, x, y, z, z0, theta0; - - // utot(j,ma,mb) = 0 for all j,ma,ma - // utot(j,ma,ma) = 1 for all j,ma - // for j in neighbors of i: - // compute r0 = (x,y,z,z0) - // utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb - - zero_uarraytot(); - addself_uarraytot(wself); - - for(int j = 0; j < jnum; j++) { - x = rij[j][0]; - y = rij[j][1]; - z = rij[j][2]; - rsq = x * x + y * y + z * z; - r = sqrt(rsq); - theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0); - // theta0 = (r - rmin0) * rscale0; - z0 = r / tan(theta0); - omp_set_num_threads(sub_threads); - -#if defined(_OPENMP) -#pragma omp parallel shared(x,y,z,z0,r,sub_threads) default(none) -#endif - { - compute_uarray_omp(x, y, z, z0, r, sub_threads); - } - add_uarraytot(r, wj[j], rcutij[j]); - } - - } /* ---------------------------------------------------------------------- @@ -389,137 +345,225 @@ void SNA::compute_ui_omp(int jnum, int sub_threads) void SNA::compute_zi() { - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // for ma = 0,...,j - // for mb = 0,...,jmid - // z(j1,j2,j,ma,mb) = 0 - // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) - // sumb1 = 0 - // ma2 = ma-ma1+(j1+j2-j)/2; - // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) - // mb2 = mb-mb1+(j1+j2-j)/2; - // sumb1 += cg(j1,mb1,j2,mb2,j) * - // u(j1,ma1,mb1) * u(j2,ma2,mb2) - // z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j) -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif + int ma2, mb2; + for(int jjz = 0; jjz < idxz_max; jjz++) { + const int j1 = idxz[jjz].j1; + const int j2 = idxz[jjz].j2; + const int j = idxz[jjz].j; + const int ma1min = idxz[jjz].ma1min; + const int ma2max = idxz[jjz].ma2max; + const int na = idxz[jjz].na; + const int mb1min = idxz[jjz].mb1min; + const int mb2max = idxz[jjz].mb2max; + const int nb = idxz[jjz].nb; - // compute_dbidrj() requires full j1/j2/j chunk of z elements - // use zarray j1/j2 symmetry + const double* cgblock = cglist + idxcg_block[j1][j2][j]; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) { - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { - double sumb1_r, sumb1_i; - int ma2, mb2; - for(int mb = 0; 2*mb <= j; mb++) - for(int ma = 0; ma <= j; ma++) { - zarray_r[j1][j2][j][ma][mb] = 0.0; - zarray_i[j1][j2][j][ma][mb] = 0.0; + zlist_r[jjz] = 0.0; + zlist_i[jjz] = 0.0; - for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); - ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) { - sumb1_r = 0.0; - sumb1_i = 0.0; + int jju1 = idxu_block[j1] + (j1+1)*mb1min; + int jju2 = idxu_block[j2] + (j2+1)*mb2max; + int icgb = mb1min*(j2+1) + mb2max; + for(int ib = 0; ib < nb; ib++) { - ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; + double suma1_r = 0.0; + double suma1_i = 0.0; - for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2); - mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) { + const double* u1_r = &ulisttot_r[jju1]; + const double* u1_i = &ulisttot_i[jju1]; + const double* u2_r = &ulisttot_r[jju2]; + const double* u2_i = &ulisttot_i[jju2]; - mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2; - sumb1_r += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] - - uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]); - sumb1_i += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] + - uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]); - } // end loop over mb1 + int ma1 = ma1min; + int ma2 = ma2max; + int icga = ma1min*(j2+1) + ma2max; + for(int ia = 0; ia < na; ia++) { + suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); + suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); + ma1++; + ma2--; + icga += j2; + } // end loop over ia - zarray_r[j1][j2][j][ma][mb] += - sumb1_r * cgarray[j1][j2][j][ma1][ma2]; - zarray_i[j1][j2][j][ma][mb] += - sumb1_i * cgarray[j1][j2][j][ma1][ma2]; - } // end loop over ma1 - } // end loop over ma, mb - } // end loop over j - } // end loop over j1, j2 + zlist_r[jjz] += cgblock[icgb] * suma1_r; + zlist_i[jjz] += cgblock[icgb] * suma1_i; + jju1 += j1+1; + jju2 -= j2+1; + icgb += j2; + } // end loop over ib -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif + } // end loop over jjz } -void SNA::compute_zi_omp(int sub_threads) +/* ---------------------------------------------------------------------- + compute Yi from Ui without storing Zi, looping over zlist indices +------------------------------------------------------------------------- */ + +void SNA::compute_yi(const double* beta) { - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // for ma = 0,...,j - // for mb = 0,...,j - // z(j1,j2,j,ma,mb) = 0 - // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) - // sumb1 = 0 - // ma2 = ma-ma1+(j1+j2-j)/2; - // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) - // mb2 = mb-mb1+(j1+j2-j)/2; - // sumb1 += cg(j1,mb1,j2,mb2,j) * - // u(j1,ma1,mb1) * u(j2,ma2,mb2) - // z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j) + int j; + int jjz; + int jju; + double betaj; - if(omp_in_parallel()) - omp_set_num_threads(sub_threads); + for(int j = 0; j <= twojmax; j++) { + jju = idxu_block[j]; + for(int mb = 0; 2*mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) { + ylist_r[jju] = 0.0; + ylist_i[jju] = 0.0; + jju++; + } // end loop over ma, mb + } // end loop over j - // compute_dbidrj() requires full j1/j2/j chunk of z elements - // use zarray j1/j2 symmetry + int ma2, mb2; + for(int jjz = 0; jjz < idxz_max; jjz++) { + const int j1 = idxz[jjz].j1; + const int j2 = idxz[jjz].j2; + const int j = idxz[jjz].j; + const int ma1min = idxz[jjz].ma1min; + const int ma2max = idxz[jjz].ma2max; + const int na = idxz[jjz].na; + const int mb1min = idxz[jjz].mb1min; + const int mb2max = idxz[jjz].mb2max; + const int nb = idxz[jjz].nb; -#if defined(_OPENMP) -#pragma omp parallel for schedule(auto) default(none) -#endif - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + const double* cgblock = cglist + idxcg_block[j1][j2][j]; + int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2; + int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2; - double sumb1_r, sumb1_i; - int ma2, mb2; + double ztmp_r = 0.0; + double ztmp_i = 0.0; - for(int ma = 0; ma <= j; ma++) - for(int mb = 0; mb <= j; mb++) { - zarray_r[j1][j2][j][ma][mb] = 0.0; - zarray_i[j1][j2][j][ma][mb] = 0.0; + int jju1 = idxu_block[j1] + (j1+1)*mb1min; + int jju2 = idxu_block[j2] + (j2+1)*mb2max; + int icgb = mb1min*(j2+1) + mb2max; + for(int ib = 0; ib < nb; ib++) { - for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); - ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) { - sumb1_r = 0.0; - sumb1_i = 0.0; + double suma1_r = 0.0; + double suma1_i = 0.0; - ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; + const double* u1_r = &ulisttot_r[jju1]; + const double* u1_i = &ulisttot_i[jju1]; + const double* u2_r = &ulisttot_r[jju2]; + const double* u2_i = &ulisttot_i[jju2]; - for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2); - mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) { + int ma1 = ma1min; + int ma2 = ma2max; + int icga = ma1min*(j2+1) + ma2max; - mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2; - sumb1_r += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] - - uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]); - sumb1_i += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] + - uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]); - } + for(int ia = 0; ia < na; ia++) { + suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); + suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); + ma1++; + ma2--; + icga += j2; + } // end loop over ia - zarray_r[j1][j2][j][ma][mb] += - sumb1_r * cgarray[j1][j2][j][ma1][ma2]; - zarray_i[j1][j2][j][ma][mb] += - sumb1_i * cgarray[j1][j2][j][ma1][ma2]; - } + ztmp_r += cgblock[icgb] * suma1_r; + ztmp_i += cgblock[icgb] * suma1_i; + jju1 += j1+1; + jju2 -= j2+1; + icgb += j2; + } // end loop over ib + + // apply to z(j1,j2,j,ma,mb) to unique element of y(j) + // find right y_list[jju] and beta[jjb] entries + // multiply and divide by j+1 factors + // account for multiplicity of 1, 2, or 3 + + const int jju = idxz[jjz].jju; + + // pick out right beta value + + if (j >= j1) { + const int jjb = idxb_block[j1][j2][j]; + if (j1 == j) { + if (j2 == j) betaj = 3*beta[jjb]; + else betaj = 2*beta[jjb]; + } else betaj = beta[jjb]; + } else if (j >= j2) { + const int jjb = idxb_block[j][j2][j1]; + if (j2 == j) betaj = 2*beta[jjb]*(j1+1)/(j+1.0); + else betaj = beta[jjb]*(j1+1)/(j+1.0); + } else { + const int jjb = idxb_block[j2][j][j1]; + betaj = beta[jjb]*(j1+1)/(j+1.0); + } + + ylist_r[jju] += betaj*ztmp_r; + ylist_i[jju] += betaj*ztmp_i; + + } // end loop over jjz +} + +/* ---------------------------------------------------------------------- + compute dEidRj +------------------------------------------------------------------------- */ + +void SNA::compute_deidrj(double* dedr) +{ + + for(int k = 0; k < 3; k++) + dedr[k] = 0.0; + + for(int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + + for(int mb = 0; 2*mb < j; mb++) + for(int ma = 0; ma <= j; ma++) { + + double* dudr_r = dulist_r[jju]; + double* dudr_i = dulist_i[jju]; + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + + for(int k = 0; k < 3; k++) + dedr[k] += + dudr_r[k] * jjjmambyarray_r + + dudr_i[k] * jjjmambyarray_i; + jju++; + } //end loop over ma mb + + // For j even, handle middle column + + if (j%2 == 0) { + + int mb = j/2; + for(int ma = 0; ma < mb; ma++) { + double* dudr_r = dulist_r[jju]; + double* dudr_i = dulist_i[jju]; + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + + for(int k = 0; k < 3; k++) + dedr[k] += + dudr_r[k] * jjjmambyarray_r + + dudr_i[k] * jjjmambyarray_i; + jju++; } - } + + int ma = mb; + double* dudr_r = dulist_r[jju]; + double* dudr_i = dulist_i[jju]; + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + + for(int k = 0; k < 3; k++) + dedr[k] += + (dudr_r[k] * jjjmambyarray_r + + dudr_i[k] * jjjmambyarray_i)*0.5; + jju++; + + } // end if jeven + + } // end loop over j + + for(int k = 0; k < 3; k++) + dedr[k] *= 2.0; + } /* ---------------------------------------------------------------------- @@ -537,269 +581,46 @@ void SNA::compute_bi() // b(j1,j2,j) += // 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb) -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif + for(int jjb = 0; jjb < idxb_max; jjb++) { + const int j1 = idxb[jjb].j1; + const int j2 = idxb[jjb].j2; + const int j = idxb[jjb].j; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) { - for(int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - barray[j1][j2][j] = 0.0; + int jjz = idxz_block[j1][j2][j]; + int jju = idxu_block[j]; + double sumzu = 0.0; + for (int mb = 0; 2*mb < j; mb++) + for (int ma = 0; ma <= j; ma++) { + sumzu += ulisttot_r[jju]*zlist_r[jjz] + + ulisttot_i[jju]*zlist_i[jjz]; + jjz++; + jju++; + } // end loop over ma, mb - for(int mb = 0; 2*mb < j; mb++) - for(int ma = 0; ma <= j; ma++) - barray[j1][j2][j] += - uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + - uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb]; + // For j even, handle middle column - // For j even, special treatment for middle column - - if (j%2 == 0) { - int mb = j/2; - for(int ma = 0; ma < mb; ma++) - barray[j1][j2][j] += - uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + - uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb]; - int ma = mb; - barray[j1][j2][j] += - (uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + - uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb])*0.5; - } - - barray[j1][j2][j] *= 2.0; - if (bzero_flag) - barray[j1][j2][j] -= bzero[j]; + if (j%2 == 0) { + int mb = j/2; + for(int ma = 0; ma < mb; ma++) { + sumzu += ulisttot_r[jju]*zlist_r[jjz] + + ulisttot_i[jju]*zlist_i[jjz]; + jjz++; + jju++; } - } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[2] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif + sumzu += 0.5*(ulisttot_r[jju]*zlist_r[jjz] + + ulisttot_i[jju]*zlist_i[jjz]); + jjz++; + jju++; + } // end if jeven -} + blist[jjb] = 2.0*sumzu; -/* ---------------------------------------------------------------------- - copy Bi array to a vector -------------------------------------------------------------------------- */ - -void SNA::copy_bi2bvec() -{ - int ncount, j1, j2, j; - - ncount = 0; - - for(j1 = 0; j1 <= twojmax; j1++) - if(diagonalstyle == 0) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } - } else if(diagonalstyle == 1) { - j2 = j1; - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } - } else if(diagonalstyle == 2) { - j = j2 = j1; - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } else if(diagonalstyle == 3) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) { - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } - } -} - -/* ---------------------------------------------------------------------- - calculate derivative of Ui w.r.t. atom j -------------------------------------------------------------------------- */ - -void SNA::compute_duidrj(double* rij, double wj, double rcut) -{ - double rsq, r, x, y, z, z0, theta0, cs, sn; - double dz0dr; - - x = rij[0]; - y = rij[1]; - z = rij[2]; - rsq = x * x + y * y + z * z; - r = sqrt(rsq); - double rscale0 = rfac0 * MY_PI / (rcut - rmin0); - theta0 = (r - rmin0) * rscale0; - cs = cos(theta0); - sn = sin(theta0); - z0 = r * cs / sn; - dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - - compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut); - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - -} - -/* ---------------------------------------------------------------------- - calculate derivative of Bi w.r.t. atom j - variant using indexlist for j1,j2,j - variant not using symmetry relation -------------------------------------------------------------------------- */ - -void SNA::compute_dbidrj_nonsymm() -{ - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // dbdr(j1,j2,j) = 0 - // for ma = 0,...,j - // for mb = 0,...,j - // dzdr = 0 - // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) - // sumb1 = 0 - // ma2 = ma-ma1+(j1+j2-j)/2; - // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) - // mb2 = mb-mb1+(j1+j2-j)/2; - // sumb1 += cg(j1,mb1,j2,mb2,j) * - // (dudr(j1,ma1,mb1) * u(j2,ma2,mb2) + - // u(j1,ma1,mb1) * dudr(j2,ma2,mb2)) - // dzdr += sumb1*cg(j1,ma1,j2,ma2,j) - // dbdr(j1,j2,j) += - // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + - // Conj(u(j,ma,mb))*dzdr - - double* dbdr; - double* dudr_r, *dudr_i; - double sumb1_r[3], sumb1_i[3], dzdr_r[3], dzdr_i[3]; - int ma2; - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - - for(int JJ = 0; JJ < idxj_max; JJ++) { - const int j1 = idxj[JJ].j1; - const int j2 = idxj[JJ].j2; - const int j = idxj[JJ].j; - - dbdr = dbarray[j1][j2][j]; - dbdr[0] = 0.0; - dbdr[1] = 0.0; - dbdr[2] = 0.0; - - double** *j1duarray_r = duarray_r[j1]; - double** *j2duarray_r = duarray_r[j2]; - double** *j1duarray_i = duarray_i[j1]; - double** *j2duarray_i = duarray_i[j2]; - double** j1uarraytot_r = uarraytot_r[j1]; - double** j2uarraytot_r = uarraytot_r[j2]; - double** j1uarraytot_i = uarraytot_i[j1]; - double** j2uarraytot_i = uarraytot_i[j2]; - double** j1j2jcgarray = cgarray[j1][j2][j]; - - for(int ma = 0; ma <= j; ma++) - for(int mb = 0; mb <= j; mb++) { - dzdr_r[0] = 0.0; - dzdr_r[1] = 0.0; - dzdr_r[2] = 0.0; - dzdr_i[0] = 0.0; - dzdr_i[1] = 0.0; - dzdr_i[2] = 0.0; - - const int max_mb1 = MIN(j1, (2 * mb - j + j2 + j1) / 2) + 1; - const int max_ma1 = MIN(j1, (2 * ma - j + j2 + j1) / 2) + 1; - - for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); - ma1 < max_ma1; ma1++) { - - ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; - sumb1_r[0] = 0.0; - sumb1_r[1] = 0.0; - sumb1_r[2] = 0.0; - sumb1_i[0] = 0.0; - sumb1_i[1] = 0.0; - sumb1_i[2] = 0.0; - - //inside loop 54 operations (mul and add) - for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2), - mb2 = mb + (j1 + j2 - j) / 2 - mb1; - mb1 < max_mb1; mb1++, mb2--) { - - double* dudr1_r, *dudr1_i, *dudr2_r, *dudr2_i; - - dudr1_r = j1duarray_r[ma1][mb1]; - dudr2_r = j2duarray_r[ma2][mb2]; - dudr1_i = j1duarray_i[ma1][mb1]; - dudr2_i = j2duarray_i[ma2][mb2]; - - const double cga_mb1mb2 = j1j2jcgarray[mb1][mb2]; - const double uat_r_ma2mb2 = cga_mb1mb2 * j2uarraytot_r[ma2][mb2]; - const double uat_r_ma1mb1 = cga_mb1mb2 * j1uarraytot_r[ma1][mb1]; - const double uat_i_ma2mb2 = cga_mb1mb2 * j2uarraytot_i[ma2][mb2]; - const double uat_i_ma1mb1 = cga_mb1mb2 * j1uarraytot_i[ma1][mb1]; - - for(int k = 0; k < 3; k++) { - sumb1_r[k] += dudr1_r[k] * uat_r_ma2mb2; - sumb1_r[k] -= dudr1_i[k] * uat_i_ma2mb2; - sumb1_i[k] += dudr1_r[k] * uat_i_ma2mb2; - sumb1_i[k] += dudr1_i[k] * uat_r_ma2mb2; - - sumb1_r[k] += dudr2_r[k] * uat_r_ma1mb1; - sumb1_r[k] -= dudr2_i[k] * uat_i_ma1mb1; - sumb1_i[k] += dudr2_r[k] * uat_i_ma1mb1; - sumb1_i[k] += dudr2_i[k] * uat_r_ma1mb1; - } - } // end loop over mb1,mb2 - - // dzdr += sumb1*cg(j1,ma1,j2,ma2,j) - - dzdr_r[0] += sumb1_r[0] * j1j2jcgarray[ma1][ma2]; - dzdr_r[1] += sumb1_r[1] * j1j2jcgarray[ma1][ma2]; - dzdr_r[2] += sumb1_r[2] * j1j2jcgarray[ma1][ma2]; - dzdr_i[0] += sumb1_i[0] * j1j2jcgarray[ma1][ma2]; - dzdr_i[1] += sumb1_i[1] * j1j2jcgarray[ma1][ma2]; - dzdr_i[2] += sumb1_i[2] * j1j2jcgarray[ma1][ma2]; - } // end loop over ma1,ma2 - - // dbdr(j1,j2,j) += - // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + - // Conj(u(j,ma,mb))*dzdr - - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - - for(int k = 0; k < 3; k++) - dbdr[k] += - (dudr_r[k] * zarray_r[j1][j2][j][ma][mb] + - dudr_i[k] * zarray_i[j1][j2][j][ma][mb]) + - (uarraytot_r[j][ma][mb] * dzdr_r[k] + - uarraytot_i[j][ma][mb] * dzdr_i[k]); - } //end loop over ma mb - - } //end loop over j1 j2 j - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif + // apply bzero shift + if (bzero_flag) + blist[jjb] -= bzero[j]; + } } /* ---------------------------------------------------------------------- @@ -839,48 +660,36 @@ void SNA::compute_dbidrj() double** jjjzarray_i; double jjjmambzarray_r; double jjjmambzarray_i; + int jjz, jju; -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif + for(int jjb = 0; jjb < idxb_max; jjb++) { + const int j1 = idxb[jjb].j1; + const int j2 = idxb[jjb].j2; + const int j = idxb[jjb].j; - for(int JJ = 0; JJ < idxj_max; JJ++) { - const int j1 = idxj[JJ].j1; - const int j2 = idxj[JJ].j2; - const int j = idxj[JJ].j; - - dbdr = dbarray[j1][j2][j]; + dbdr = dblist[jjb]; dbdr[0] = 0.0; dbdr[1] = 0.0; dbdr[2] = 0.0; // Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + jjz = idxz_block[j1][j2][j]; + jju = idxu_block[j]; + for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; - // use zarray j1/j2 symmetry (optional) - - if (j1 >= j2) { - jjjzarray_r = zarray_r[j1][j2][j]; - jjjzarray_i = zarray_i[j1][j2][j]; - } else { - jjjzarray_r = zarray_r[j2][j1][j]; - jjjzarray_i = zarray_i[j2][j1][j]; - } - for(int mb = 0; 2*mb < j; mb++) for(int ma = 0; ma <= j; ma++) { - - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - jjjmambzarray_r = jjjzarray_r[ma][mb]; - jjjmambzarray_i = jjjzarray_i[ma][mb]; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; - + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } //end loop over ma mb // For j even, handle middle column @@ -888,24 +697,24 @@ void SNA::compute_dbidrj() if (j%2 == 0) { int mb = j/2; for(int ma = 0; ma < mb; ma++) { - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - jjjmambzarray_r = jjjzarray_r[ma][mb]; - jjjmambzarray_i = jjjzarray_i[ma][mb]; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } int ma = mb; - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - jjjmambzarray_r = jjjzarray_r[ma][mb]; - jjjmambzarray_i = jjjzarray_i[ma][mb]; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - (dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i)*0.5; + (dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz])*0.5; + jjz++; + jju++; } // end if jeven for(int k = 0; k < 3; k++) @@ -915,115 +724,97 @@ void SNA::compute_dbidrj() double j1fac = (j+1)/(j1+1.0); + jjz = idxz_block[j][j2][j1]; + jju = idxu_block[j1]; + for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; - // use zarray j1/j2 symmetry (optional) - - if (j >= j2) { - jjjzarray_r = zarray_r[j][j2][j1]; - jjjzarray_i = zarray_i[j][j2][j1]; - } else { - jjjzarray_r = zarray_r[j2][j][j1]; - jjjzarray_i = zarray_i[j2][j][j1]; - } - - for(int mb1 = 0; 2*mb1 < j1; mb1++) - for(int ma1 = 0; ma1 <= j1; ma1++) { - - dudr_r = duarray_r[j1][ma1][mb1]; - dudr_i = duarray_i[j1][ma1][mb1]; - jjjmambzarray_r = jjjzarray_r[ma1][mb1]; - jjjmambzarray_i = jjjzarray_i[ma1][mb1]; + for(int mb = 0; 2*mb < j1; mb++) + for(int ma = 0; ma <= j1; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; - - } //end loop over ma1 mb1 + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; + } //end loop over ma mb // For j1 even, handle middle column if (j1%2 == 0) { - int mb1 = j1/2; - for(int ma1 = 0; ma1 < mb1; ma1++) { - dudr_r = duarray_r[j1][ma1][mb1]; - dudr_i = duarray_i[j1][ma1][mb1]; - jjjmambzarray_r = jjjzarray_r[ma1][mb1]; - jjjmambzarray_i = jjjzarray_i[ma1][mb1]; + int mb = j1/2; + for(int ma = 0; ma < mb; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } - int ma1 = mb1; - dudr_r = duarray_r[j1][ma1][mb1]; - dudr_i = duarray_i[j1][ma1][mb1]; - jjjmambzarray_r = jjjzarray_r[ma1][mb1]; - jjjmambzarray_i = jjjzarray_i[ma1][mb1]; + int ma = mb; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - (dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i)*0.5; + (dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz])*0.5; + jjz++; + jju++; } // end if j1even for(int k = 0; k < 3; k++) dbdr[k] += 2.0*sumzdu_r[k]*j1fac; - // Sum over Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2) + // Sum over Conj(dudr(j2,ma2,mb2))*z(j,j1,j2,ma2,mb2) double j2fac = (j+1)/(j2+1.0); + jjz = idxz_block[j][j1][j2]; + jju = idxu_block[j2]; + for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; - // use zarray j1/j2 symmetry (optional) - - if (j1 >= j) { - jjjzarray_r = zarray_r[j1][j][j2]; - jjjzarray_i = zarray_i[j1][j][j2]; - } else { - jjjzarray_r = zarray_r[j][j1][j2]; - jjjzarray_i = zarray_i[j][j1][j2]; - } - - for(int mb2 = 0; 2*mb2 < j2; mb2++) - for(int ma2 = 0; ma2 <= j2; ma2++) { - - dudr_r = duarray_r[j2][ma2][mb2]; - dudr_i = duarray_i[j2][ma2][mb2]; - jjjmambzarray_r = jjjzarray_r[ma2][mb2]; - jjjmambzarray_i = jjjzarray_i[ma2][mb2]; + for(int mb = 0; 2*mb < j2; mb++) + for(int ma = 0; ma <= j2; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; - - } //end loop over ma2 mb2 + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; + } //end loop over ma mb // For j2 even, handle middle column if (j2%2 == 0) { - int mb2 = j2/2; - for(int ma2 = 0; ma2 < mb2; ma2++) { - dudr_r = duarray_r[j2][ma2][mb2]; - dudr_i = duarray_i[j2][ma2][mb2]; - jjjmambzarray_r = jjjzarray_r[ma2][mb2]; - jjjmambzarray_i = jjjzarray_i[ma2][mb2]; + int mb = j2/2; + for(int ma = 0; ma < mb; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } - int ma2 = mb2; - dudr_r = duarray_r[j2][ma2][mb2]; - dudr_i = duarray_i[j2][ma2][mb2]; - jjjmambzarray_r = jjjzarray_r[ma2][mb2]; - jjjmambzarray_i = jjjzarray_i[ma2][mb2]; + int ma = mb; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - (dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i)*0.5; + (dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz])*0.5; + jjz++; + jju++; } // end if j2even for(int k = 0; k < 3; k++) @@ -1031,84 +822,59 @@ void SNA::compute_dbidrj() } //end loop over j1 j2 j -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - } /* ---------------------------------------------------------------------- - copy Bi derivatives into a vector + calculate derivative of Ui w.r.t. atom j ------------------------------------------------------------------------- */ -void SNA::copy_dbi2dbvec() +void SNA::compute_duidrj(double* rij, double wj, double rcut) { - int ncount, j1, j2, j; + double rsq, r, x, y, z, z0, theta0, cs, sn; + double dz0dr; - ncount = 0; + x = rij[0]; + y = rij[1]; + z = rij[2]; + rsq = x * x + y * y + z * z; + r = sqrt(rsq); + double rscale0 = rfac0 * MY_PI / (rcut - rmin0); + theta0 = (r - rmin0) * rscale0; + cs = cos(theta0); + sn = sin(theta0); + z0 = r * cs / sn; + dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - for(j1 = 0; j1 <= twojmax; j1++) { - if(diagonalstyle == 0) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } - } else if(diagonalstyle == 1) { - j2 = j1; - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } - } else if(diagonalstyle == 2) { - j = j2 = j1; - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } else if(diagonalstyle == 3) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) { - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } - } - } + compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut); } /* ---------------------------------------------------------------------- */ void SNA::zero_uarraytot() { - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - uarraytot_r[j][ma][mb] = 0.0; - uarraytot_i[j][ma][mb] = 0.0; + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + for (int mb = 0; mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + ulisttot_r[jju] = 0.0; + ulisttot_i[jju] = 0.0; + jju++; } + } } /* ---------------------------------------------------------------------- */ void SNA::addself_uarraytot(double wself_in) { - for (int j = 0; j <= twojmax; j++) + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; for (int ma = 0; ma <= j; ma++) { - uarraytot_r[j][ma][ma] = wself_in; - uarraytot_i[j][ma][ma] = 0.0; + ulisttot_r[jju] = wself_in; + ulisttot_i[jju] = 0.0; + jju += j+2; } + } } /* ---------------------------------------------------------------------- @@ -1123,35 +889,17 @@ void SNA::add_uarraytot(double r, double wj, double rcut) sfac *= wj; - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - uarraytot_r[j][ma][mb] += - sfac * uarray_r[j][ma][mb]; - uarraytot_i[j][ma][mb] += - sfac * uarray_i[j][ma][mb]; - } -} - -void SNA::add_uarraytot_omp(double r, double wj, double rcut) -{ - double sfac; - - sfac = compute_sfac(r, rcut); - - sfac *= wj; - -#if defined(_OPENMP) -#pragma omp for -#endif - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - uarraytot_r[j][ma][mb] += - sfac * uarray_r[j][ma][mb]; - uarraytot_i[j][ma][mb] += - sfac * uarray_i[j][ma][mb]; + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + for (int mb = 0; mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + ulisttot_r[jju] += + sfac * ulist_r[jju]; + ulisttot_i[jju] += + sfac * ulist_i[jju]; + jju++; } + } } /* ---------------------------------------------------------------------- @@ -1175,145 +923,72 @@ void SNA::compute_uarray(double x, double y, double z, // VMK Section 4.8.2 - uarray_r[0][0][0] = 1.0; - uarray_i[0][0][0] = 0.0; + ulist_r[0] = 1.0; + ulist_i[0] = 0.0; for (int j = 1; j <= twojmax; j++) { + int jju = idxu_block[j]; + int jjup = idxu_block[j-1]; // fill in left side of matrix layer from previous layer for (int mb = 0; 2*mb <= j; mb++) { - uarray_r[j][0][mb] = 0.0; - uarray_i[j][0][mb] = 0.0; + ulist_r[jju] = 0.0; + ulist_i[jju] = 0.0; for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][j - mb]; - uarray_r[j][ma][mb] += + ulist_r[jju] += rootpq * - (a_r * uarray_r[j - 1][ma][mb] + - a_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma][mb] += + (a_r * ulist_r[jjup] + + a_i * ulist_i[jjup]); + ulist_i[jju] += rootpq * - (a_r * uarray_i[j - 1][ma][mb] - - a_i * uarray_r[j - 1][ma][mb]); + (a_r * ulist_i[jjup] - + a_i * ulist_r[jjup]); rootpq = rootpqarray[ma + 1][j - mb]; - uarray_r[j][ma + 1][mb] = + ulist_r[jju+1] = -rootpq * - (b_r * uarray_r[j - 1][ma][mb] + - b_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma + 1][mb] = + (b_r * ulist_r[jjup] + + b_i * ulist_i[jjup]); + ulist_i[jju+1] = -rootpq * - (b_r * uarray_i[j - 1][ma][mb] - - b_i * uarray_r[j - 1][ma][mb]); + (b_r * ulist_i[jjup] - + b_i * ulist_r[jjup]); + jju++; + jjup++; } + jju++; } // copy left side to right side with inversion symmetry VMK 4.4(2) // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) - int mbpar = -1; + jju = idxu_block[j]; + jjup = jju+(j+1)*(j+1)-1; + int mbpar = 1; for (int mb = 0; 2*mb <= j; mb++) { - mbpar = -mbpar; - int mapar = -mbpar; + int mapar = mbpar; for (int ma = 0; ma <= j; ma++) { - mapar = -mapar; if (mapar == 1) { - uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb]; + ulist_r[jjup] = ulist_r[jju]; + ulist_i[jjup] = -ulist_i[jju]; } else { - uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb]; + ulist_r[jjup] = -ulist_r[jju]; + ulist_i[jjup] = ulist_i[jju]; } + mapar = -mapar; + jju++; + jjup--; } - } - } -} - -void SNA::compute_uarray_omp(double x, double y, double z, - double z0, double r, int /*sub_threads*/) -{ - double r0inv; - double a_r, b_r, a_i, b_i; - double rootpq; - - // compute Cayley-Klein parameters for unit quaternion - - r0inv = 1.0 / sqrt(r * r + z0 * z0); - a_r = r0inv * z0; - a_i = -r0inv * z; - b_r = r0inv * y; - b_i = -r0inv * x; - - // VMK Section 4.8.2 - - uarray_r[0][0][0] = 1.0; - uarray_i[0][0][0] = 0.0; - - for (int j = 1; j <= twojmax; j++) { -#if defined(_OPENMP) -#pragma omp for -#endif - for (int mb = 0; mb < j; mb++) { - uarray_r[j][0][mb] = 0.0; - uarray_i[j][0][mb] = 0.0; - - for (int ma = 0; ma < j; ma++) { - rootpq = rootpqarray[j - ma][j - mb]; - uarray_r[j][ma][mb] += - rootpq * - (a_r * uarray_r[j - 1][ma][mb] + - a_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma][mb] += - rootpq * - (a_r * uarray_i[j - 1][ma][mb] - - a_i * uarray_r[j - 1][ma][mb]); - - rootpq = rootpqarray[ma + 1][j - mb]; - uarray_r[j][ma + 1][mb] = - -rootpq * - (b_r * uarray_r[j - 1][ma][mb] + - b_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma + 1][mb] = - -rootpq * - (b_r * uarray_i[j - 1][ma][mb] - - b_i * uarray_r[j - 1][ma][mb]); - } - } - - int mb = j; - uarray_r[j][0][mb] = 0.0; - uarray_i[j][0][mb] = 0.0; - -#if defined(_OPENMP) -#pragma omp for -#endif - for (int ma = 0; ma < j; ma++) { - rootpq = rootpqarray[j - ma][mb]; - uarray_r[j][ma][mb] += - rootpq * - (b_r * uarray_r[j - 1][ma][mb - 1] - - b_i * uarray_i[j - 1][ma][mb - 1]); - uarray_i[j][ma][mb] += - rootpq * - (b_r * uarray_i[j - 1][ma][mb - 1] + - b_i * uarray_r[j - 1][ma][mb - 1]); - - rootpq = rootpqarray[ma + 1][mb]; - uarray_r[j][ma + 1][mb] = - rootpq * - (a_r * uarray_r[j - 1][ma][mb - 1] - - a_i * uarray_i[j - 1][ma][mb - 1]); - uarray_i[j][ma + 1][mb] = - rootpq * - (a_r * uarray_i[j - 1][ma][mb - 1] + - a_i * uarray_r[j - 1][ma][mb - 1]); + mbpar = -mbpar; } } } /* ---------------------------------------------------------------------- - compute derivatives of Wigner U-functions for one neighbor + Compute derivatives of Wigner U-functions for one neighbor see comments in compute_uarray() ------------------------------------------------------------------------- */ @@ -1363,93 +1038,105 @@ void SNA::compute_duarray(double x, double y, double z, db_i[0] += -r0inv; db_r[1] += r0inv; - uarray_r[0][0][0] = 1.0; - duarray_r[0][0][0][0] = 0.0; - duarray_r[0][0][0][1] = 0.0; - duarray_r[0][0][0][2] = 0.0; - uarray_i[0][0][0] = 0.0; - duarray_i[0][0][0][0] = 0.0; - duarray_i[0][0][0][1] = 0.0; - duarray_i[0][0][0][2] = 0.0; + ulist_r[0] = 1.0; + dulist_r[0][0] = 0.0; + dulist_r[0][1] = 0.0; + dulist_r[0][2] = 0.0; + ulist_i[0] = 0.0; + dulist_i[0][0] = 0.0; + dulist_i[0][1] = 0.0; + dulist_i[0][2] = 0.0; for (int j = 1; j <= twojmax; j++) { + int jju = idxu_block[j]; + int jjup = idxu_block[j-1]; for (int mb = 0; 2*mb <= j; mb++) { - uarray_r[j][0][mb] = 0.0; - duarray_r[j][0][mb][0] = 0.0; - duarray_r[j][0][mb][1] = 0.0; - duarray_r[j][0][mb][2] = 0.0; - uarray_i[j][0][mb] = 0.0; - duarray_i[j][0][mb][0] = 0.0; - duarray_i[j][0][mb][1] = 0.0; - duarray_i[j][0][mb][2] = 0.0; + ulist_r[jju] = 0.0; + dulist_r[jju][0] = 0.0; + dulist_r[jju][1] = 0.0; + dulist_r[jju][2] = 0.0; + ulist_i[jju] = 0.0; + dulist_i[jju][0] = 0.0; + dulist_i[jju][1] = 0.0; + dulist_i[jju][2] = 0.0; for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][j - mb]; - uarray_r[j][ma][mb] += rootpq * - (a_r * uarray_r[j - 1][ma][mb] + - a_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma][mb] += rootpq * - (a_r * uarray_i[j - 1][ma][mb] - - a_i * uarray_r[j - 1][ma][mb]); + ulist_r[jju] += rootpq * + (a_r * ulist_r[jjup] + + a_i * ulist_i[jjup]); + ulist_i[jju] += rootpq * + (a_r * ulist_i[jjup] - + a_i * ulist_r[jjup]); for (int k = 0; k < 3; k++) { - duarray_r[j][ma][mb][k] += - rootpq * (da_r[k] * uarray_r[j - 1][ma][mb] + - da_i[k] * uarray_i[j - 1][ma][mb] + - a_r * duarray_r[j - 1][ma][mb][k] + - a_i * duarray_i[j - 1][ma][mb][k]); - duarray_i[j][ma][mb][k] += - rootpq * (da_r[k] * uarray_i[j - 1][ma][mb] - - da_i[k] * uarray_r[j - 1][ma][mb] + - a_r * duarray_i[j - 1][ma][mb][k] - - a_i * duarray_r[j - 1][ma][mb][k]); + dulist_r[jju][k] += + rootpq * (da_r[k] * ulist_r[jjup] + + da_i[k] * ulist_i[jjup] + + a_r * dulist_r[jjup][k] + + a_i * dulist_i[jjup][k]); + dulist_i[jju][k] += + rootpq * (da_r[k] * ulist_i[jjup] - + da_i[k] * ulist_r[jjup] + + a_r * dulist_i[jjup][k] - + a_i * dulist_r[jjup][k]); } rootpq = rootpqarray[ma + 1][j - mb]; - uarray_r[j][ma + 1][mb] = - -rootpq * (b_r * uarray_r[j - 1][ma][mb] + - b_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma + 1][mb] = - -rootpq * (b_r * uarray_i[j - 1][ma][mb] - - b_i * uarray_r[j - 1][ma][mb]); + ulist_r[jju+1] = + -rootpq * (b_r * ulist_r[jjup] + + b_i * ulist_i[jjup]); + ulist_i[jju+1] = + -rootpq * (b_r * ulist_i[jjup] - + b_i * ulist_r[jjup]); for (int k = 0; k < 3; k++) { - duarray_r[j][ma + 1][mb][k] = - -rootpq * (db_r[k] * uarray_r[j - 1][ma][mb] + - db_i[k] * uarray_i[j - 1][ma][mb] + - b_r * duarray_r[j - 1][ma][mb][k] + - b_i * duarray_i[j - 1][ma][mb][k]); - duarray_i[j][ma + 1][mb][k] = - -rootpq * (db_r[k] * uarray_i[j - 1][ma][mb] - - db_i[k] * uarray_r[j - 1][ma][mb] + - b_r * duarray_i[j - 1][ma][mb][k] - - b_i * duarray_r[j - 1][ma][mb][k]); + dulist_r[jju+1][k] = + -rootpq * (db_r[k] * ulist_r[jjup] + + db_i[k] * ulist_i[jjup] + + b_r * dulist_r[jjup][k] + + b_i * dulist_i[jjup][k]); + dulist_i[jju+1][k] = + -rootpq * (db_r[k] * ulist_i[jjup] - + db_i[k] * ulist_r[jjup] + + b_r * dulist_i[jjup][k] - + b_i * dulist_r[jjup][k]); } + jju++; + jjup++; } + jju++; } - int mbpar = -1; + // copy left side to right side with inversion symmetry VMK 4.4(2) + // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) + + jju = idxu_block[j]; + jjup = jju+(j+1)*(j+1)-1; + int mbpar = 1; for (int mb = 0; 2*mb <= j; mb++) { - mbpar = -mbpar; - int mapar = -mbpar; + int mapar = mbpar; for (int ma = 0; ma <= j; ma++) { - mapar = -mapar; if (mapar == 1) { - uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb]; + ulist_r[jjup] = ulist_r[jju]; + ulist_i[jjup] = -ulist_i[jju]; for (int k = 0; k < 3; k++) { - duarray_r[j][j-ma][j-mb][k] = duarray_r[j][ma][mb][k]; - duarray_i[j][j-ma][j-mb][k] = -duarray_i[j][ma][mb][k]; + dulist_r[jjup][k] = dulist_r[jju][k]; + dulist_i[jjup][k] = -dulist_i[jju][k]; } } else { - uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb]; + ulist_r[jjup] = -ulist_r[jju]; + ulist_i[jjup] = ulist_i[jju]; for (int k = 0; k < 3; k++) { - duarray_r[j][j-ma][j-mb][k] = -duarray_r[j][ma][mb][k]; - duarray_i[j][j-ma][j-mb][k] = duarray_i[j][ma][mb][k]; + dulist_r[jjup][k] = -dulist_r[jju][k]; + dulist_i[jjup][k] = dulist_i[jju][k]; } } + mapar = -mapar; + jju++; + jjup--; } + mbpar = -mbpar; } } @@ -1458,23 +1145,25 @@ void SNA::compute_duarray(double x, double y, double z, sfac *= wj; dsfac *= wj; - - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - duarray_r[j][ma][mb][0] = dsfac * uarray_r[j][ma][mb] * ux + - sfac * duarray_r[j][ma][mb][0]; - duarray_i[j][ma][mb][0] = dsfac * uarray_i[j][ma][mb] * ux + - sfac * duarray_i[j][ma][mb][0]; - duarray_r[j][ma][mb][1] = dsfac * uarray_r[j][ma][mb] * uy + - sfac * duarray_r[j][ma][mb][1]; - duarray_i[j][ma][mb][1] = dsfac * uarray_i[j][ma][mb] * uy + - sfac * duarray_i[j][ma][mb][1]; - duarray_r[j][ma][mb][2] = dsfac * uarray_r[j][ma][mb] * uz + - sfac * duarray_r[j][ma][mb][2]; - duarray_i[j][ma][mb][2] = dsfac * uarray_i[j][ma][mb] * uz + - sfac * duarray_i[j][ma][mb][2]; + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + dulist_r[jju][0] = dsfac * ulist_r[jju] * ux + + sfac * dulist_r[jju][0]; + dulist_i[jju][0] = dsfac * ulist_i[jju] * ux + + sfac * dulist_i[jju][0]; + dulist_r[jju][1] = dsfac * ulist_r[jju] * uy + + sfac * dulist_r[jju][1]; + dulist_i[jju][1] = dsfac * ulist_i[jju] * uy + + sfac * dulist_i[jju][1]; + dulist_r[jju][2] = dsfac * ulist_r[jju] * uz + + sfac * dulist_r[jju][2]; + dulist_i[jju][2] = dsfac * ulist_i[jju] * uz + + sfac * dulist_i[jju][2]; + jju++; } + } } /* ---------------------------------------------------------------------- @@ -1483,87 +1172,96 @@ void SNA::compute_duarray(double x, double y, double z, double SNA::memory_usage() { + int jdimpq = twojmax + 2; int jdim = twojmax + 1; double bytes; - bytes = jdim * jdim * jdim * jdim * jdim * sizeof(double); - bytes += 2 * jdim * jdim * jdim * sizeof(complex); - bytes += 2 * jdim * jdim * jdim * sizeof(double); - bytes += jdim * jdim * jdim * 3 * sizeof(complex); - bytes += jdim * jdim * jdim * 3 * sizeof(double); - bytes += ncoeff * sizeof(double); - bytes += jdim * jdim * jdim * jdim * jdim * sizeof(complex); + + bytes = 0; + + bytes += jdimpq*jdimpq * sizeof(double); // pqarray + bytes += idxcg_max * sizeof(double); // cglist + + bytes += idxu_max * sizeof(double) * 2; // ulist + bytes += idxu_max * sizeof(double) * 2; // ulisttot + bytes += idxu_max * 3 * sizeof(double) * 2; // dulist + + bytes += idxz_max * sizeof(double) * 2; // zlist + bytes += idxb_max * sizeof(double); // blist + bytes += idxb_max * 3 * sizeof(double); // dblist + bytes += idxu_max * sizeof(double) * 2; // ylist + + bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block + bytes += jdim * sizeof(int); // idxu_block + bytes += jdim * jdim * jdim * sizeof(int); // idxz_block + bytes += jdim * jdim * jdim * sizeof(int); // idxb_block + + bytes += idxz_max * sizeof(SNA_ZINDICES); // idxz + bytes += idxb_max * sizeof(SNA_BINDICES); // idxb + + bytes += jdim * sizeof(double); // bzero + + bytes += nmax * 3 * sizeof(double); // rij + bytes += nmax * sizeof(int); // inside + bytes += nmax * sizeof(double); // wj + bytes += nmax * sizeof(double); // rcutij + return bytes; } - /* ---------------------------------------------------------------------- */ void SNA::create_twojmax_arrays() { - int jdim = twojmax + 1; - - memory->create(cgarray, jdim, jdim, jdim, jdim, jdim, - "sna:cgarray"); - memory->create(rootpqarray, jdim+1, jdim+1, + int jdimpq = twojmax + 2; + memory->create(rootpqarray, jdimpq, jdimpq, "sna:rootpqarray"); - memory->create(barray, jdim, jdim, jdim, - "sna:barray"); - memory->create(dbarray, jdim, jdim, jdim, 3, - "sna:dbarray"); - - memory->create(duarray_r, jdim, jdim, jdim, 3, - "sna:duarray"); - memory->create(duarray_i, jdim, jdim, jdim, 3, - "sna:duarray"); - - memory->create(uarray_r, jdim, jdim, jdim, - "sna:uarray"); - memory->create(uarray_i, jdim, jdim, jdim, - "sna:uarray"); + memory->create(cglist, idxcg_max, "sna:cglist"); + memory->create(ulist_r, idxu_max, "sna:ulist"); + memory->create(ulist_i, idxu_max, "sna:ulist"); + memory->create(ulisttot_r, idxu_max, "sna:ulisttot"); + memory->create(ulisttot_i, idxu_max, "sna:ulisttot"); + memory->create(dulist_r, idxu_max, 3, "sna:dulist"); + memory->create(dulist_i, idxu_max, 3, "sna:dulist"); + memory->create(zlist_r, idxz_max, "sna:zlist"); + memory->create(zlist_i, idxz_max, "sna:zlist"); + memory->create(blist, idxb_max, "sna:blist"); + memory->create(dblist, idxb_max, 3, "sna:dblist"); + memory->create(ylist_r, idxu_max, "sna:ylist"); + memory->create(ylist_i, idxu_max, "sna:ylist"); if (bzero_flag) - memory->create(bzero, jdim,"sna:bzero"); + memory->create(bzero, twojmax+1,"sna:bzero"); else bzero = NULL; - - if(!use_shared_arrays) { - memory->create(uarraytot_r, jdim, jdim, jdim, - "sna:uarraytot"); - memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim, - "sna:zarray"); - memory->create(uarraytot_i, jdim, jdim, jdim, - "sna:uarraytot"); - memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim, - "sna:zarray"); - } - } /* ---------------------------------------------------------------------- */ void SNA::destroy_twojmax_arrays() { - memory->destroy(cgarray); memory->destroy(rootpqarray); - memory->destroy(barray); + memory->destroy(cglist); + memory->destroy(ulist_r); + memory->destroy(ulist_i); + memory->destroy(ulisttot_r); + memory->destroy(ulisttot_i); + memory->destroy(dulist_r); + memory->destroy(dulist_i); + memory->destroy(zlist_r); + memory->destroy(zlist_i); + memory->destroy(blist); + memory->destroy(dblist); + memory->destroy(ylist_r); + memory->destroy(ylist_i); - memory->destroy(dbarray); - - memory->destroy(duarray_r); - memory->destroy(duarray_i); - - memory->destroy(uarray_r); - memory->destroy(uarray_i); + memory->destroy(idxcg_block); + memory->destroy(idxu_block); + memory->destroy(idxz_block); + memory->destroy(idxb_block); if (bzero_flag) memory->destroy(bzero); - if(!use_shared_arrays) { - memory->destroy(uarraytot_r); - memory->destroy(zarray_r); - memory->destroy(uarraytot_i); - memory->destroy(zarray_i); - } } /* ---------------------------------------------------------------------- @@ -1779,28 +1477,33 @@ void SNA::init_clebsch_gordan() int m, aa2, bb2, cc2; int ifac; - for (int j1 = 0; j1 <= twojmax; j1++) - for (int j2 = 0; j2 <= twojmax; j2++) - for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - for (int m1 = 0; m1 <= j1; m1 += 1) { + int idxcg_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + for (int m1 = 0; m1 <= j1; m1++) { aa2 = 2 * m1 - j1; - for (int m2 = 0; m2 <= j2; m2 += 1) { + for (int m2 = 0; m2 <= j2; m2++) { // -c <= cc <= c bb2 = 2 * m2 - j2; m = (aa2 + bb2 + j) / 2; - if(m < 0 || m > j) continue; + if(m < 0 || m > j) { + cglist[idxcg_count] = 0.0; + idxcg_count++; + continue; + } sum = 0.0; for (int z = MAX(0, MAX(-(j - j2 + aa2) - / 2, -(j - j1 - bb2) / 2)); - z <= MIN((j1 + j2 - j) / 2, - MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); - z++) { + / 2, -(j - j1 - bb2) / 2)); + z <= MIN((j1 + j2 - j) / 2, + MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); + z++) { ifac = z % 2 ? -1 : 1; sum += ifac / (factorial(z) * @@ -1810,20 +1513,22 @@ void SNA::init_clebsch_gordan() factorial((j - j2 + aa2) / 2 + z) * factorial((j - j1 - bb2) / 2 + z)); } - + cc2 = 2 * m - j; dcg = deltacg(j1, j2, j); sfaccg = sqrt(factorial((j1 + aa2) / 2) * - factorial((j1 - aa2) / 2) * - factorial((j2 + bb2) / 2) * - factorial((j2 - bb2) / 2) * - factorial((j + cc2) / 2) * - factorial((j - cc2) / 2) * - (j + 1)); - - cgarray[j1][j2][j][m1][m2] = sum * dcg * sfaccg; + factorial((j1 - aa2) / 2) * + factorial((j2 + bb2) / 2) * + factorial((j2 - bb2) / 2) * + factorial((j + cc2) / 2) * + factorial((j - cc2) / 2) * + (j + 1)); + + cglist[idxcg_count] = sum * dcg * sfaccg; + idxcg_count++; } } + } } /* ---------------------------------------------------------------------- @@ -1838,74 +1543,6 @@ void SNA::init_rootpqarray() rootpqarray[p][q] = sqrt(static_cast(p)/q); } -/* ---------------------------------------------------------------------- - a = j/2 -------------------------------------------------------------------------- */ - -void SNA::jtostr(char* str, int j) -{ - if(j % 2 == 0) - sprintf(str, "%d", j / 2); - else - sprintf(str, "%d/2", j); -} - -/* ---------------------------------------------------------------------- - aa = m - j/2 -------------------------------------------------------------------------- */ - -void SNA::mtostr(char* str, int j, int m) -{ - if(j % 2 == 0) - sprintf(str, "%d", m - j / 2); - else - sprintf(str, "%d/2", 2 * m - j); -} - -/* ---------------------------------------------------------------------- - list values of Clebsch-Gordan coefficients - using notation of VMK Table 8.11 -------------------------------------------------------------------------- */ - -void SNA::print_clebsch_gordan(FILE* file) -{ - char stra[20], strb[20], strc[20], straa[20], strbb[20], strcc[20]; - int m, aa2, bb2; - - fprintf(file, "a, aa, b, bb, c, cc, c(a,aa,b,bb,c,cc) \n"); - - for (int j1 = 0; j1 <= twojmax; j1++) { - jtostr(stra, j1); - - for (int j2 = 0; j2 <= twojmax; j2++) { - jtostr(strb, j2); - - for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - jtostr(strc, j); - - for (int m1 = 0; m1 <= j1; m1 += 1) { - mtostr(straa, j1, m1); - aa2 = 2 * m1 - j1; - - for (int m2 = 0; m2 <= j2; m2 += 1) { - bb2 = 2 * m2 - j2; - m = (aa2 + bb2 + j) / 2; - - if(m < 0 || m > j) continue; - - mtostr(strbb, j2, m2); - mtostr(strcc, j, m); - - fprintf(file, "%s\t%s\t%s\t%s\t%s\t%s\t%g\n", - stra, straa, strb, strbb, strc, strcc, - cgarray[j1][j2][j][m1][m2]); - } - } - } - } - } -} - /* ---------------------------------------------------------------------- */ int SNA::compute_ncoeff() @@ -1915,25 +1552,10 @@ int SNA::compute_ncoeff() ncount = 0; for (int j1 = 0; j1 <= twojmax; j1++) - if(diagonalstyle == 0) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 1) { - int j2 = j1; - - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 2) { - ncount++; - } else if(diagonalstyle == 3) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) ncount++; - } + for (int j2 = 0; j2 <= j1; j2++) + for (int j = j1 - j2; + j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) ncount++; return ncount; } diff --git a/src/SNAP/sna.h b/src/SNAP/sna.h index d05ad0fb84..1e08ef123c 100644 --- a/src/SNAP/sna.h +++ b/src/SNAP/sna.h @@ -18,20 +18,22 @@ #ifndef LMP_SNA_H #define LMP_SNA_H -#include #include "pointers.h" -#include namespace LAMMPS_NS { -struct SNA_LOOPINDICES { +struct SNA_ZINDICES { + int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju; +}; + +struct SNA_BINDICES { int j1, j2, j; }; class SNA : protected Pointers { public: - SNA(LAMMPS*, double, int, int, int, double, int, int); + SNA(LAMMPS*, double, int, double, int, int); SNA(LAMMPS* lmp) : Pointers(lmp) {}; ~SNA(); @@ -44,31 +46,21 @@ public: // functions for bispectrum coefficients void compute_ui(int); - void compute_ui_omp(int, int); void compute_zi(); - void compute_zi_omp(int); + void compute_yi(const double*); + void compute_yterm(int, int, int, const double*); void compute_bi(); - void copy_bi2bvec(); // functions for derivatives void compute_duidrj(double*, double, double); void compute_dbidrj(); - void compute_dbidrj_nonsymm(); - void copy_dbi2dbvec(); + void compute_deidrj(double*); double compute_sfac(double, double); double compute_dsfac(double, double); -#ifdef TIMING_INFO - double* timers; - timespec starttime, endtime; - int print; - int counter; -#endif - - //per sna class instance for OMP use - - double* bvec, ** dbvec; + double* blist; + double** dblist; double** rij; int* inside; double* wj; @@ -77,29 +69,32 @@ public: void grow_rij(int); - int twojmax, diagonalstyle; - double*** uarraytot_r, *** uarraytot_i; - double***** zarray_r, ***** zarray_i; - double*** uarraytot_r_b, *** uarraytot_i_b; - double***** zarray_r_b, ***** zarray_i_b; - double*** uarray_r, *** uarray_i; + int twojmax; private: double rmin0, rfac0; - //use indexlist instead of loops, constructor generates these - SNA_LOOPINDICES* idxj; - int idxj_max; // data for bispectrum coefficients - double***** cgarray; + SNA_ZINDICES* idxz; + SNA_BINDICES* idxb; + int idxcg_max, idxu_max, idxz_max, idxb_max; + double** rootpqarray; - double*** barray; + double* cglist; + int*** idxcg_block; - // derivatives of data + double* ulisttot_r, * ulisttot_i; + double* ulist_r, * ulist_i; + int* idxu_block; - double**** duarray_r, **** duarray_i; - double**** dbarray; + double* zlist_r, * zlist_i; + int*** idxz_block; + + int*** idxb_block; + + double** dulist_r, ** dulist_i; + double* ylist_r, * ylist_i; static const int nmaxfactorial = 167; static const double nfac_table[]; @@ -109,28 +104,16 @@ private: void destroy_twojmax_arrays(); void init_clebsch_gordan(); void init_rootpqarray(); - void jtostr(char*, int); - void mtostr(char*, int, int); - void print_clebsch_gordan(FILE*); void zero_uarraytot(); void addself_uarraytot(double); void add_uarraytot(double, double, double); - void add_uarraytot_omp(double, double, double); void compute_uarray(double, double, double, double, double); - void compute_uarray_omp(double, double, double, - double, double, int); double deltacg(int, int, int); int compute_ncoeff(); void compute_duarray(double, double, double, double, double, double, double, double); - // if number of atoms are small use per atom arrays - // for twojmax arrays, rij, inside, bvec - // this will increase the memory footprint considerably, - // but allows parallel filling and reuse of these arrays - int use_shared_arrays; - // Sets the style for the switching function // 0 = none // 1 = cosine @@ -140,7 +123,7 @@ private: double wself; int bzero_flag; // 1 if bzero subtracted from barray - double *bzero; // array of B values for isolated atoms + double* bzero; // array of B values for isolated atoms }; }