diff --git a/src/KOKKOS/pair_pod_kokkos.cpp b/src/KOKKOS/pair_pod_kokkos.cpp index 563eaf38d6..dee4f7a03b 100644 --- a/src/KOKKOS/pair_pod_kokkos.cpp +++ b/src/KOKKOS/pair_pod_kokkos.cpp @@ -13,10 +13,10 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Stan Moore (SNL) + Contributing author: Ngoc Cuong Nguyen (MIT) ------------------------------------------------------------------------- */ -#include "eapod.h" +#include "eapod.h" #include "pair_pod_kokkos.h" #include "atom_kokkos.h" @@ -55,12 +55,12 @@ PairPODKokkos::PairPODKokkos(LAMMPS *lmp) : PairPOD(lmp) ni = 0; nimax = 0; nij = 0; - nijmax = 0; + nijmax = 0; atomBlockSize = 2048; nAtomBlocks = 0; timing = 1; for (int i=0; i<100; i++) comptime[i] = 0; - + host_flag = (execution_space == Host); } @@ -70,17 +70,17 @@ PairPODKokkos::PairPODKokkos(LAMMPS *lmp) : PairPOD(lmp) template PairPODKokkos::~PairPODKokkos() -{ +{ if (timing == 1) { printf("\n begin timing \n"); - for (int i=0; i<10; i++) printf("%g ", comptime[i]); + for (int i=0; i<10; i++) printf("%g ", comptime[i]); printf("\n"); - for (int i=10; i<20; i++) printf("%g ", comptime[i]); + for (int i=10; i<20; i++) printf("%g ", comptime[i]); // printf("\n"); -// for (int i=20; i<30; i++) printf("%g ", comptime[i]); +// for (int i=20; i<30; i++) printf("%g ", comptime[i]); printf("\n end timing \n"); - } - + } + if (copymode) return; memoryKK->destroy_kokkos(k_eatom,eatom); @@ -141,10 +141,10 @@ template void PairPODKokkos::coeff(int narg, char **arg) { if (narg < 7) utils::missing_cmd_args(FLERR, "pair_coeff", error); - + PairPOD::coeff(narg,arg); // create a PairPOD object - - copy_from_pod_class(PairPOD::fastpodptr); // copy parameters and arrays from pod class + + copy_from_pod_class(PairPOD::fastpodptr); // copy parameters and arrays from pod class int n = atom->ntypes + 1; MemKK::realloc_kokkos(d_map, "pair_pod:map", n); @@ -154,15 +154,15 @@ void PairPODKokkos::coeff(int narg, char **arg) MemKK::realloc_kokkos(k_scale, "pair_pod:scale", n, n); d_scale = k_scale.template view(); - - // Set up element lists - + + // Set up element lists + auto h_map = Kokkos::create_mirror_view(d_map); - + for (int i = 1; i <= atom->ntypes; i++) h_map(i) = map[i]; - - Kokkos::deep_copy(d_map,h_map); + + Kokkos::deep_copy(d_map,h_map); } /* ---------------------------------------------------------------------- */ @@ -200,14 +200,14 @@ void PairPODKokkos::compute(int eflag_in, int vflag_in) // atomKK->modified(Host,F_MASK); // return; // } - + eflag = eflag_in; vflag = vflag_in; if (neighflag == FULL) no_virial_fdotr_compute = 1; ev_init(eflag,vflag,0); - + // reallocate per-atom arrays if necessary if (eflag_atom) { memoryKK->destroy_kokkos(k_eatom,eatom); @@ -220,41 +220,41 @@ void PairPODKokkos::compute(int eflag_in, int vflag_in) d_vatom = k_vatom.view(); } - copymode = 1; + copymode = 1; int newton_pair = force->newton_pair; if (newton_pair == false) error->all(FLERR,"PairPODKokkos requires 'newton on'"); - - atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK); - x = atomKK->k_x.view(); + + atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK); + x = atomKK->k_x.view(); f = atomKK->k_f.view(); type = atomKK->k_type.view(); //k_cutsq.template sync(); - + maxneigh = 0; - if (host_flag) { - inum = list->inum; + if (host_flag) { + inum = list->inum; d_numneigh = typename ArrayTypes::t_int_1d("pair_pod:numneigh",inum); - for (int i=0; inumneigh[i]; + for (int i=0; inumneigh[i]; d_ilist = typename ArrayTypes::t_int_1d("pair_pod:ilist",inum); - for (int i=0; iilist[i]; - + for (int i=0; iilist[i]; + int maxn = 0; - for (int i=0; inumneigh[i]) maxn = list->numneigh[i]; + for (int i=0; inumneigh[i]) maxn = list->numneigh[i]; MemoryKokkos::realloc_kokkos(d_neighbors,"neighlist:neighbors",inum,maxn); for (int i=0; iilist[i]; - int m = list->numneigh[gi]; + int m = list->numneigh[gi]; if (maxneighfirstneigh[gi][l]; } - } + } } else { - NeighListKokkos* k_list = static_cast*>(list); - d_numneigh = k_list->d_numneigh; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; inum = list->inum; @@ -263,67 +263,67 @@ void PairPODKokkos::compute(int eflag_in, int vflag_in) maxneigh = maxneighs; } - auto begin = std::chrono::high_resolution_clock::now(); + auto begin = std::chrono::high_resolution_clock::now(); auto end = std::chrono::high_resolution_clock::now(); - + // determine the number of atom blocks and divide atoms into blocks nAtomBlocks = calculateNumberOfIntervals(inum, atomBlockSize); - if (nAtomBlocks > 100) nAtomBlocks = 100; + if (nAtomBlocks > 100) nAtomBlocks = 100; divideInterval(atomBlocks, inum, nAtomBlocks); - + int nmax = 0; - for (int block=0; block(end-begin).count()/1e6; - - begin = std::chrono::high_resolution_clock::now(); - // obtain the neighbors within rcut - NeighborList(rij, numij, typeai, idxi, ai, aj, ti, tj, rcutsq, gi1, ni); + end = std::chrono::high_resolution_clock::now(); + comptime[0] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + // obtain the neighbors within rcut + NeighborList(rij, numij, typeai, idxi, ai, aj, ti, tj, rcutsq, gi1, ni); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[1] += std::chrono::duration_cast(end-begin).count()/1e6; - + end = std::chrono::high_resolution_clock::now(); + comptime[1] += std::chrono::duration_cast(end-begin).count()/1e6; + // compute atomic energy and force for the current atom block - begin = std::chrono::high_resolution_clock::now(); + begin = std::chrono::high_resolution_clock::now(); blockatom_energyforce(ei, fij, ni, nij); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[2] += std::chrono::duration_cast(end-begin).count()/1e6; + end = std::chrono::high_resolution_clock::now(); + comptime[2] += std::chrono::duration_cast(end-begin).count()/1e6; - begin = std::chrono::high_resolution_clock::now(); + begin = std::chrono::high_resolution_clock::now(); // tally atomic energy to global energy tallyenergy(ei, gi1, ni); - + // tally atomic force to global force tallyforce(fij, ai, aj, nij); - + // tally atomic stress if (vflag) { tallystress(fij, rij, ai, aj, nij); - } + } Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[3] += std::chrono::duration_cast(end-begin).count()/1e6; - + end = std::chrono::high_resolution_clock::now(); + comptime[3] += std::chrono::duration_cast(end-begin).count()/1e6; + //savedatafordebugging(); - } - + } + if (vflag_fdotr) pair_virial_fdotr_compute(this); if (eflag_atom) { @@ -342,9 +342,9 @@ void PairPODKokkos::compute(int eflag_in, int vflag_in) } template -void PairPODKokkos::copy_from_pod_class(EAPOD *podptr) +void PairPODKokkos::copy_from_pod_class(EAPOD *podptr) { - nelements = podptr->nelements; // number of elements + nelements = podptr->nelements; // number of elements onebody = podptr->onebody; // one-body descriptors besseldegree = podptr->besseldegree; // degree of Bessel functions inversedegree = podptr->inversedegree; // degree of inverse functions @@ -365,30 +365,30 @@ void PairPODKokkos::copy_from_pod_class(EAPOD *podptr) nrbf4 = podptr->nrbf4; nrbfmax = podptr->nrbfmax; // number of radial basis functions nabf3 = podptr->nabf3; // number of three-body angular basis functions - nabf4 = podptr->nabf4; // number of four-body angular basis functions + nabf4 = podptr->nabf4; // number of four-body angular basis functions K3 = podptr->K3; // number of three-body monomials K4 = podptr->K4; // number of four-body monomials Q4 = podptr->Q4; // number of four-body monomial coefficients nClusters = podptr->nClusters; // number of environment clusters nComponents = podptr->nComponents; // number of principal components - Mdesc = podptr->Mdesc; // number of base descriptors + Mdesc = podptr->Mdesc; // number of base descriptors rin = podptr->rin; rcut = podptr->rcut; - rmax = rcut - rin; - - MemKK::realloc_kokkos(besselparams, "pair_pod:besselparams", 3); - auto h_besselparams = Kokkos::create_mirror_view(besselparams); + rmax = rcut - rin; + + MemKK::realloc_kokkos(besselparams, "pair_pod:besselparams", 3); + auto h_besselparams = Kokkos::create_mirror_view(besselparams); h_besselparams[0] = podptr->besselparams[0]; h_besselparams[1] = podptr->besselparams[1]; - h_besselparams[2] = podptr->besselparams[2]; - Kokkos::deep_copy(besselparams, h_besselparams); - + h_besselparams[2] = podptr->besselparams[2]; + Kokkos::deep_copy(besselparams, h_besselparams); + MemKK::realloc_kokkos(elemindex, "pair_pod:elemindex", nelements*nelements); auto h_elemindex = Kokkos::create_mirror_view(elemindex); for (int i=0; ielemindex[i]; Kokkos::deep_copy(elemindex, h_elemindex); - + MemKK::realloc_kokkos(Phi, "pair_pod:Phi", ns*ns); auto h_Phi = Kokkos::create_mirror_view(Phi); for (int i=0; iPhi[i]; @@ -408,15 +408,15 @@ void PairPODKokkos::copy_from_pod_class(EAPOD *podptr) MemKK::realloc_kokkos(Centroids, "pair_pod:Centroids", nClusters * nComponents * nelements); auto h_Centroids = Kokkos::create_mirror_view(Centroids); for (int i=0; iCentroids[i]; - Kokkos::deep_copy(Centroids, h_Centroids); + Kokkos::deep_copy(Centroids, h_Centroids); } - + MemKK::realloc_kokkos(pn3, "pair_pod:pn3", nabf3+1); // array stores the number of monomials for each degree MemKK::realloc_kokkos(pq3, "pair_pod:pq3", K3*2); // array needed for the recursive computation of the angular basis functions MemKK::realloc_kokkos(pc3, "pair_pod:pc3", K3); // array needed for the computation of the three-body descriptors MemKK::realloc_kokkos(pa4, "pair_pod:pa4", nabf4+1); // this array is a subset of the array {0, 1, 4, 10, 19, 29, 47, 74, 89, 119, 155, 209, 230, 275, 335, 425, 533, 561, 624, 714, 849, 949, 1129, 1345} MemKK::realloc_kokkos(pb4, "pair_pod:pb4", Q4*3); // array stores the indices of the monomials needed for the computation of the angular basis functions - MemKK::realloc_kokkos(pc4, "pair_pod:pc4", Q4); // array of monomial coefficients needed for the computation of the four-body descriptors + MemKK::realloc_kokkos(pc4, "pair_pod:pc4", Q4); // array of monomial coefficients needed for the computation of the four-body descriptors auto h_pn3 = Kokkos::create_mirror_view(pn3); for (int i=0; ipn3[i]; @@ -448,7 +448,7 @@ void PairPODKokkos::copy_from_pod_class(EAPOD *podptr) MemKK::realloc_kokkos(ind34r, "pair_pod:ind34r", nl34); MemKK::realloc_kokkos(ind44l, "pair_pod:ind44l", nl44); MemKK::realloc_kokkos(ind44r, "pair_pod:ind44r", nl44); - + auto h_ind33l = Kokkos::create_mirror_view(ind33l); for (int i = 0; i < nl33; i++) h_ind33l[i] = podptr->ind33l[i]; Kokkos::deep_copy(ind33l, h_ind33l); @@ -471,11 +471,11 @@ void PairPODKokkos::copy_from_pod_class(EAPOD *podptr) auto h_ind44r = Kokkos::create_mirror_view(ind44r); for (int i = 0; i < nl44; i++) h_ind44r[i] = podptr->ind44r[i]; - Kokkos::deep_copy(ind44r, h_ind44r); + Kokkos::deep_copy(ind44r, h_ind44r); } template -void PairPODKokkos::divideInterval(int *intervals, int N, int M) +void PairPODKokkos::divideInterval(int *intervals, int N, int M) { int intervalSize = N / M; // Basic size of each interval int remainder = N % M; // Remainder to distribute @@ -485,11 +485,11 @@ void PairPODKokkos::divideInterval(int *intervals, int N, int M) if (remainder > 0) { remainder--; } - } + } } template -int PairPODKokkos::calculateNumberOfIntervals(int N, int intervalSize) +int PairPODKokkos::calculateNumberOfIntervals(int N, int intervalSize) { if (intervalSize <= 0) { printf("Interval size must be a positive integer.\n"); @@ -509,7 +509,7 @@ void PairPODKokkos::grow_atoms(int Ni) { if (Ni > nimax) { nimax = Ni; - MemKK::realloc_kokkos(numij, "pair_pod:numij", nimax+1); + MemKK::realloc_kokkos(numij, "pair_pod:numij", nimax+1); MemKK::realloc_kokkos(ei, "pair_pod:ei", nimax); MemKK::realloc_kokkos(typeai, "pair_pod:typeai", nimax); int n = nimax * nelements * K3 * nrbfmax; @@ -517,8 +517,8 @@ void PairPODKokkos::grow_atoms(int Ni) MemKK::realloc_kokkos(forcecoeff, "pair_pod:forcecoeff", n); MemKK::realloc_kokkos(bd, "pair_pod:bd", nimax * Mdesc); MemKK::realloc_kokkos(cb, "pair_pod:cb", nimax * Mdesc); - if (nClusters > 1)MemKK::realloc_kokkos(pd, "pair_pod:pd", nimax * (1 + nComponents + 3*nClusters)); - Kokkos::deep_copy(numij, 0); + if (nClusters > 1)MemKK::realloc_kokkos(pd, "pair_pod:pd", nimax * (1 + nComponents + 3*nClusters)); + Kokkos::deep_copy(numij, 0); } } @@ -528,7 +528,7 @@ void PairPODKokkos::grow_pairs(int Nij) if (Nij > nijmax) { nijmax = Nij; MemKK::realloc_kokkos(rij, "pair_pod:r_ij", 3 * nijmax); - MemKK::realloc_kokkos(fij, "pair_pod:f_ij", 3 * nijmax); + MemKK::realloc_kokkos(fij, "pair_pod:f_ij", 3 * nijmax); MemKK::realloc_kokkos(idxi, "pair_pod:idxi", nijmax); MemKK::realloc_kokkos(ai, "pair_pod:ai", nijmax); MemKK::realloc_kokkos(aj, "pair_pod:aj", nijmax); @@ -542,14 +542,14 @@ void PairPODKokkos::grow_pairs(int Nij) MemKK::realloc_kokkos(abf, "pair_pod:abf", nijmax * kmax); MemKK::realloc_kokkos(abfx, "pair_pod:abfx", nijmax * kmax); MemKK::realloc_kokkos(abfy, "pair_pod:abfy", nijmax * kmax); - MemKK::realloc_kokkos(abfz, "pair_pod:abfz", nijmax * kmax); + MemKK::realloc_kokkos(abfz, "pair_pod:abfz", nijmax * kmax); } } template int PairPODKokkos::NeighborCount(t_pod_1i l_numij, double l_rcutsq, int gi1, int Ni) { - // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for + // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for auto l_ilist = d_ilist; auto l_x = x; auto l_numneigh = d_numneigh; @@ -559,10 +559,10 @@ int PairPODKokkos::NeighborCount(t_pod_1i l_numij, double l_rcutsq, Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { int i = team.league_rank(); int gi = l_ilist(gi1 + i); - double xi0 = l_x(gi, 0); - double xi1 = l_x(gi, 1); - double xi2 = l_x(gi, 2); - int jnum = l_numneigh(gi); + double xi0 = l_x(gi, 0); + double xi1 = l_x(gi, 1); + double xi2 = l_x(gi, 2); + int jnum = l_numneigh(gi); int ncount = 0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum), [&] (const int jj, int& count) { @@ -572,50 +572,50 @@ int PairPODKokkos::NeighborCount(t_pod_1i l_numij, double l_rcutsq, double dely = xi1 - l_x(j,1); double delz = xi2 - l_x(j,2); double rsq = delx*delx + dely*dely + delz*delz; - if (rsq < l_rcutsq) count++; + if (rsq < l_rcutsq) count++; },ncount); - l_numij(i+1) = ncount; + l_numij(i+1) = ncount; }); - + // accumalative sum Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) { - if (i > 0) { + if (i > 0) { update += l_numij(i); if (final) { l_numij(i) = update; } } - }); + }); int total_neighbors = 0; - Kokkos::deep_copy(Kokkos::View(&total_neighbors), Kokkos::subview(l_numij, Ni)); - + Kokkos::deep_copy(Kokkos::View(&total_neighbors), Kokkos::subview(l_numij, Ni)); + return total_neighbors; } template -void PairPODKokkos::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, t_pod_1i l_typeai, +void PairPODKokkos::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, t_pod_1i l_typeai, t_pod_1i l_idxi, t_pod_1i l_ai, t_pod_1i l_aj, t_pod_1i l_ti, t_pod_1i l_tj, double l_rcutsq, int gi1, int Ni) -{ - // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for +{ + // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for auto l_ilist = d_ilist; auto l_x = x; auto l_numneigh = d_numneigh; auto l_neighbors = d_neighbors; - auto l_map = d_map; - auto l_type = type; - + auto l_map = d_map; + auto l_type = type; + Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { int i = team.league_rank(); int gi = l_ilist(gi1 + i); - double xi0 = l_x(gi, 0); - double xi1 = l_x(gi, 1); - double xi2 = l_x(gi, 2); + double xi0 = l_x(gi, 0); + double xi1 = l_x(gi, 1); + double xi2 = l_x(gi, 2); int itype = l_map(l_type(gi)) + 1; //map[atomtypes[gi]] + 1; - l_typeai(i) = itype; - int jnum = l_numneigh(gi); - int nij0 = l_numij(i); + l_typeai(i) = itype; + int jnum = l_numneigh(gi); + int nij0 = l_numij(i); Kokkos::parallel_scan(Kokkos::TeamThreadRange(team,jnum), [&] (const int jj, int& offset, bool final) { int gj = l_neighbors(gi,jj); @@ -634,17 +634,17 @@ void PairPODKokkos::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, l_ai(nij1) = gi; l_aj(nij1) = gj; l_ti(nij1) = itype; - l_tj(nij1) = l_map(l_type(gj)) + 1; //map[atomtypes[gj)) + 1; + l_tj(nij1) = l_map(l_type(gj)) + 1; //map[atomtypes[gj)) + 1; } offset++; - }); - }); + }); + }); } template -void PairPODKokkos::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, - t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, - int l_inversedegree, int l_nbesselpars, int l_ns, int Nij) +void PairPODKokkos::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, + t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, + int l_inversedegree, int l_nbesselpars, int l_ns, int Nij) { Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) { double xij1 = l_rij(0+3*n); @@ -724,13 +724,13 @@ void PairPODKokkos::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod rbfty(idxni) = drbftdr*dr2; rbftz(idxni) = drbftdr*dr3; } - + // Calculate fcut/dij and dfcut/dij f1 = fcut/dij; double a = 1.0; for (int i=0; i::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod } template -void PairPODKokkos::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2) +void PairPODKokkos::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2) { Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) { int j = idx / r1; // Calculate column index @@ -754,13 +754,13 @@ void PairPODKokkos::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d sum += a(i + r1*k) * b(k + c1*j); // Manually calculate the 1D index } c(i + r1*j) = sum; // Manually calculate the 1D index for c - }); + }); } template void PairPODKokkos::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, - t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N) -{ + t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N) +{ Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) { double x = l_rij(j*3 + 0); double y = l_rij(j*3 + 1); @@ -795,12 +795,12 @@ void PairPODKokkos::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_ l_abf(idxa) = 1.0; l_abfx(idxa) = 0.0; l_abfy(idxa) = 0.0; - l_abfz(idxa) = 0.0; - + l_abfz(idxa) = 0.0; + // Loop over all angular basis functions for (int n=1; n::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_ l_abfx(idxa) = l_abfx(mj)*w; l_abfy(idxa) = l_abfy(mj)*w; l_abfz(idxa) = l_abfz(mj)*w + l_abf(mj); - } + } } - for (int n=1; n -void PairPODKokkos::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, - t_pod_1i l_numij, const int l_nelements, const int l_nrbf3, const int l_K3, const int Ni, const int Nij) -{ +void PairPODKokkos::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, + t_pod_1i l_numij, const int l_nelements, const int l_nrbf3, const int l_K3, const int Ni, const int Nij) +{ int totalIterations = l_nrbf3 * l_K3 * Ni; if (l_nelements==1) { Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) { @@ -849,14 +849,14 @@ void PairPODKokkos::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf int kmi = k + l_K3*m + l_K3*l_nrbf3*i; int start = l_numij(i); - int nj = l_numij(i+1)-start; + int nj = l_numij(i+1)-start; double sum=0.0; for (int j=0; j::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf int i = temp / l_nrbf3; int kmi = l_nelements*k + l_nelements*l_K3*m + l_nelements*l_K3*l_nrbf3*i; int start = l_numij(i); - int nj = l_numij(i+1)-start; + int nj = l_numij(i+1)-start; double tm[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; for (int j=0; j -void PairPODKokkos::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_1i l_idxi, t_pod_1i l_tj, - int l_nrbf2, const int Ni, const int Nij) +void PairPODKokkos::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_1i l_idxi, t_pod_1i l_tj, + int l_nrbf2, const int Ni, const int Nij) { - int totalIterations = l_nrbf2 * Nij; + int totalIterations = l_nrbf2 * Nij; Kokkos::parallel_for("twobodydesc", totalIterations, KOKKOS_LAMBDA(int idx) { int n = idx / l_nrbf2; // pair index int m = idx % l_nrbf2; // rbd index @@ -895,10 +895,10 @@ void PairPODKokkos::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_ } template -void PairPODKokkos::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod_1d l_rbfx, t_pod_1d l_rbfy, - t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij) +void PairPODKokkos::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod_1d l_rbfx, t_pod_1d l_rbfy, + t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij) { - int totalIterations = l_nrbf2 * Nij; + int totalIterations = l_nrbf2 * Nij; Kokkos::parallel_for("twobody_forces", totalIterations, KOKKOS_LAMBDA(int idx) { int n = idx / l_nrbf2; // pair index int m = idx % l_nrbf2; // rbd index @@ -907,18 +907,18 @@ void PairPODKokkos::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod double c = cb2(l_idxi(n) + Ni*m + Ni*l_nrbf2*(l_tj(n) - 1)); Kokkos::atomic_add(&fij(0 + i1), c*l_rbfx(i2)); // Add the derivative with respect to x to the corresponding descriptor derivative Kokkos::atomic_add(&fij(1 + i1), c*l_rbfy(i2)); // Add the derivative with respect to y to the corresponding descriptor derivative - Kokkos::atomic_add(&fij(2 + i1), c*l_rbfz(i2)); // Add the derivative with respect to z to the corresponding descriptor derivative + Kokkos::atomic_add(&fij(2 + i1), c*l_rbfz(i2)); // Add the derivative with respect to z to the corresponding descriptor derivative }); } template -void PairPODKokkos::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, - int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni) +void PairPODKokkos::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, + int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni) { int totalIterations = l_nrbf3 * Ni; Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) { int m = idx % l_nrbf3; - int i = idx / l_nrbf3; + int i = idx / l_nrbf3; int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i; for (int p = 0; p < l_nabf3; p++) { int n1 = l_pn3(p); @@ -926,24 +926,24 @@ void PairPODKokkos::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_po int nn = n2 - n1; int ipm = i + Ni * (p + l_nabf3 * m); int k = 0; - for (int i1 = 0; i1 < l_nelements; i1++) { + for (int i1 = 0; i1 < l_nelements; i1++) { for (int i2 = i1; i2 < l_nelements; i2++) { double tmp=0; - for (int q = 0; q < nn; q++) { - tmp += l_pc3(n1 + q) * l_sumU(i1 + l_nelements * (n1 + q) + nmi) * l_sumU(i2 + l_nelements * (n1 + q) + nmi); + for (int q = 0; q < nn; q++) { + tmp += l_pc3(n1 + q) * l_sumU(i1 + l_nelements * (n1 + q) + nmi) * l_sumU(i2 + l_nelements * (n1 + q) + nmi); } d3(ipm + totalIterations * l_nabf3 * k) = tmp; k += 1; } } } - }); + }); } template -void PairPODKokkos::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_pod_1d l_rbf, t_pod_1d l_rbfx, - t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, - t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, +void PairPODKokkos::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, int Ni, int Nij) { int totalIterations = l_nrbf3 * Nij; @@ -958,89 +958,89 @@ void PairPODKokkos::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p double rbfzBase = l_rbfz(idxR); double fx = 0; double fy = 0; - double fz = 0; + double fz = 0; for (int p = 0; p < l_nabf3; p++) { double c3 = 2.0 * cb3(l_idxi(j) + Ni*p + Ni*l_nabf3*m); - int n1 = l_pn3(p); - int nn = l_pn3(p + 1) - n1; + int n1 = l_pn3(p); + int nn = l_pn3(p + 1) - n1; int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j); - for (int q = 0; q < nn; q++) { - int idxNQ = n1 + q; // Combine n1 and q into a single index for pc3 and sumU - double f = c3 * l_pc3(idxNQ) * l_sumU(idxNQ + idxU); - int idxA = j + Nij*idxNQ; // Pre-compute the index for abf - double abfA = l_abf(idxA); + for (int q = 0; q < nn; q++) { + int idxNQ = n1 + q; // Combine n1 and q into a single index for pc3 and sumU + double f = c3 * l_pc3(idxNQ) * l_sumU(idxNQ + idxU); + int idxA = j + Nij*idxNQ; // Pre-compute the index for abf + double abfA = l_abf(idxA); // Use the pre-computed indices to update dd3 fx += f * (l_abfx(idxA) * rbfBase + rbfxBase * abfA); fy += f * (l_abfy(idxA) * rbfBase + rbfyBase * abfA); - fz += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA); + fz += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA); } } - int ii = 3 * j; // Pre-compute the base index for dd3 + int ii = 3 * j; // Pre-compute the base index for dd3 Kokkos::atomic_add(&fij(0 + ii), fx); // Add the derivative with respect to x to the corresponding descriptor derivative Kokkos::atomic_add(&fij(1 + ii), fy); // Add the derivative with respect to y to the corresponding descriptor derivative - Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative + Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative }); } else { int N3 = Ni * l_nabf3 * l_nrbf3; Kokkos::parallel_for("threebody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf3; // Derive the original j value - int m = idx % l_nrbf3; // Derive the original m value + int m = idx % l_nrbf3; // Derive the original m value int i2 = l_tj(j) - 1; - int idxK = l_nelements * l_K3 * m + l_nelements*l_K3*l_nrbf3*l_idxi(j); - int idxR = j + Nij * m; // Pre-compute the index for rbf + int idxK = l_nelements * l_K3 * m + l_nelements*l_K3*l_nrbf3*l_idxi(j); + int idxR = j + Nij * m; // Pre-compute the index for rbf double rbfBase = l_rbf(idxR); double rbfxBase = l_rbfx(idxR); double rbfyBase = l_rbfy(idxR); double rbfzBase = l_rbfz(idxR); double fx = 0; double fy = 0; - double fz = 0; + double fz = 0; for (int p = 0; p < l_nabf3; p++) { int n1 = l_pn3(p); - int nn = l_pn3(p + 1) - n1; + int nn = l_pn3(p + 1) - n1; int jmp = l_idxi(j) + Ni*(p + l_nabf3*m); - for (int i1 = 0; i1 < l_nelements; i1++) { - double c3 = (i1 == i2) ? 2.0 : 1.0; + for (int i1 = 0; i1 < l_nelements; i1++) { + double c3 = (i1 == i2) ? 2.0 : 1.0; c3 = c3 * cb3(jmp + N3*l_elemindex(i2 + l_nelements * i1)); for (int q = 0; q < nn; q++) { - int idxNQ = n1 + q; // Combine n1 and q into a single index - int idxA = j + Nij*idxNQ; // Pre-compute the index for abf - double abfA = l_abf(idxA); - double f = c3 * l_pc3(idxNQ) * l_sumU(i1 + l_nelements * idxNQ + idxK); + int idxNQ = n1 + q; // Combine n1 and q into a single index + int idxA = j + Nij*idxNQ; // Pre-compute the index for abf + double abfA = l_abf(idxA); + double f = c3 * l_pc3(idxNQ) * l_sumU(i1 + l_nelements * idxNQ + idxK); fx += f * (l_abfx(idxA) * rbfBase + rbfxBase * abfA); fy += f * (l_abfy(idxA) * rbfBase + rbfyBase * abfA); - fz += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA); - } + fz += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA); + } } } - int ii = 3 * j; // Pre-compute the base index for dd3 + int ii = 3 * j; // Pre-compute the base index for dd3 Kokkos::atomic_add(&fij(0 + ii), fx); // Add the derivative with respect to x to the corresponding descriptor derivative Kokkos::atomic_add(&fij(1 + ii), fy); // Add the derivative with respect to y to the corresponding descriptor derivative - Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative - }); + Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative + }); } } template -void PairPODKokkos::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3, - t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, +void PairPODKokkos::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3, + t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, int Ni) { int totalIterations = l_nrbf3 * Ni; if (l_nelements==1) { Kokkos::parallel_for("threebody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf3; // Calculate j using integer division - int m = idx % l_nrbf3; // Calculate m using modulo operation + int m = idx % l_nrbf3; // Calculate m using modulo operation for (int p = 0; p < l_nabf3; p++) { double c3 = 2.0 * cb3(i + Ni*p + Ni*l_nabf3*m); - int n1 = l_pn3(p); - int nn = l_pn3(p + 1) - n1; + int n1 = l_pn3(p); + int nn = l_pn3(p + 1) - n1; int idxU = l_K3 * m + l_K3*l_nrbf3*i; - for (int q = 0; q < nn; q++) { - int idxNQ = n1 + q; // Combine n1 and q into a single index for pc3 and sumU - fb3(idxNQ + idxU) += c3 * l_pc3(idxNQ) * l_sumU(idxNQ + idxU); + for (int q = 0; q < nn; q++) { + int idxNQ = n1 + q; // Combine n1 and q into a single index for pc3 and sumU + fb3(idxNQ + idxU) += c3 * l_pc3(idxNQ) * l_sumU(idxNQ + idxU); } } }); @@ -1049,76 +1049,76 @@ void PairPODKokkos::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3, int N3 = Ni * l_nabf3 * l_nrbf3; Kokkos::parallel_for("threebody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf3; // Derive the original j value - int m = idx % l_nrbf3; // Derive the original m value + int m = idx % l_nrbf3; // Derive the original m value for (int p = 0; p < l_nabf3; p++) { int n1 = l_pn3(p); - int nn = l_pn3(p + 1) - n1; + int nn = l_pn3(p + 1) - n1; int jmp = i + Ni*(p + l_nabf3*m); for (int q = 0; q < nn; q++) { - int k = n1 + q; // Combine n1 and q into a single index - int idxU = l_nelements * k + l_nelements * l_K3 * m + l_nelements*l_K3*l_nrbf3*i; + int k = n1 + q; // Combine n1 and q into a single index + int idxU = l_nelements * k + l_nelements * l_K3 * m + l_nelements*l_K3*l_nrbf3*i; for (int i1 = 0; i1 < l_nelements; i1++) { double tm = l_pc3[k] * l_sumU[i1 + idxU]; for (int i2 = i1; i2 < l_nelements; i2++) { - int em = l_elemindex[i2 + l_nelements * i1]; + int em = l_elemindex[i2 + l_nelements * i1]; double t1 = tm * cb3[jmp + N3*em]; // Ni * nabf3 * nrbf3 * nelements*(nelements+1)/2 - fb3[i2 + idxU] += t1; // K3*nrbf3*Ni + fb3[i2 + idxU] += t1; // K3*nrbf3*Ni fb3[i1 + idxU] += l_pc3[k] * cb3[jmp + N3*em] * l_sumU[i2 + idxU]; } - } + } } } - }); + }); } } template -void PairPODKokkos::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, +void PairPODKokkos::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni) { int totalIterations = l_nrbf4 * Ni; Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) { int m = idx % l_nrbf4; - int i = idx / l_nrbf4; + int i = idx / l_nrbf4; int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i; for (int p = 0; p < l_nabf4; p++) { int n1 = l_pa4(p); int n2 = l_pa4(p + 1); int nn = n2 - n1; int k = 0; - for (int i1 = 0; i1 < l_nelements; i1++) { - for (int i2 = i1; i2 < l_nelements; i2++) { - for (int i3 = i2; i3 < l_nelements; i3++) { + for (int i1 = 0; i1 < l_nelements; i1++) { + for (int i2 = i1; i2 < l_nelements; i2++) { + for (int i3 = i2; i3 < l_nelements; i3++) { double tmp = 0.0; - for (int q = 0; q < nn; q++) { + for (int q = 0; q < nn; q++) { int c = l_pc4(n1 + q); int j1 = l_pb4(n1 + q); int j2 = l_pb4(n1 + q + l_Q4); - int j3 = l_pb4(n1 + q + 2 * l_Q4); + int j3 = l_pb4(n1 + q + 2 * l_Q4); tmp += c * l_sumU(idxU + i1 + l_nelements * j1) * l_sumU(idxU + i2 + l_nelements * j2) * l_sumU(idxU + i3 + l_nelements * j3); } - int kk = p + l_nabf4 * m + l_nabf4 * l_nrbf4 * k; + int kk = p + l_nabf4 * m + l_nabf4 * l_nrbf4 * k; d4(i + Ni * kk) = tmp; - k += 1; + k += 1; } } } } - }); + }); } template -void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_pod_1d l_rbf, t_pod_1d l_rbfx, - t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, - t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, +void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni, int Nij) { int totalIterations = l_nrbf4 * Nij; if (l_nelements==1) { Kokkos::parallel_for("fourbody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf4; // Derive the original j value - int m = idx % l_nrbf4; // Derive the original m value - int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j); + int m = idx % l_nrbf4; // Derive the original m value + int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j); int baseIdxJ = j + Nij * m; // Pre-compute the index for rbf double rbfBase = l_rbf(baseIdxJ); double rbfxBase = l_rbfx(baseIdxJ); @@ -1126,7 +1126,7 @@ void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po double rbfzBase = l_rbfz(baseIdxJ); double fx = 0; double fy = 0; - double fz = 0; + double fz = 0; for (int p = 0; p < l_nabf4; p++) { int n1 = l_pa4(p); int n2 = l_pa4(p + 1); @@ -1134,22 +1134,22 @@ void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po double c4 = cb4(l_idxi(j) + Ni*p + Ni*l_nabf4*m); for (int q = 0; q < nn; q++) { int idxNQ = n1 + q; // Combine n1 and q into a single index - double c = c4 * l_pc4[idxNQ]; + double c = c4 * l_pc4[idxNQ]; int j1 = l_pb4(idxNQ); int j2 = l_pb4(idxNQ + l_Q4); int j3 = l_pb4(idxNQ + 2 * l_Q4); double c1 = l_sumU(idxU + j1); double c2 = l_sumU(idxU + j2); - double c3 = l_sumU(idxU + j3); - double t12 = c * c1 * c2; + double c3 = l_sumU(idxU + j3); + double t12 = c * c1 * c2; double t13 = c * c1 * c3; double t23 = c * c2 * c3; - - // Pre-calculate commonly used indices + + // Pre-calculate commonly used indices int baseIdxJ3 = j + Nij * j3; // Common index for j3 terms int baseIdxJ2 = j + Nij * j2; // Common index for j2 terms int baseIdxJ1 = j + Nij * j1; // Common index for j1 terms - + // Temporary variables to store repeated calculations double abfBaseJ1 = l_abf(baseIdxJ1); double abfBaseJ2 = l_abf(baseIdxJ2); @@ -1166,54 +1166,54 @@ void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po + t23 * (l_abfz(baseIdxJ1) * rbfBase + rbfzBase * abfBaseJ1); } } - int ii = 3 * j; // Pre-compute the base index for dd3 + int ii = 3 * j; // Pre-compute the base index for dd3 Kokkos::atomic_add(&fij(0 + ii), fx); // Add the derivative with respect to x to the corresponding descriptor derivative Kokkos::atomic_add(&fij(1 + ii), fy); // Add the derivative with respect to y to the corresponding descriptor derivative - Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative + Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative }); } - else { + else { int N3 = Ni * l_nabf4 * l_nrbf4; Kokkos::parallel_for("fourbody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf4; // Derive the original j value - int m = idx % l_nrbf4; // Derive the original m value + int m = idx % l_nrbf4; // Derive the original m value int idxM = j + Nij * m; double rbfM = l_rbf(idxM); double rbfxM = l_rbfx(idxM); double rbfyM = l_rbfy(idxM); double rbfzM = l_rbfz(idxM); - int typej = l_tj(j) - 1; + int typej = l_tj(j) - 1; double fx = 0; double fy = 0; - double fz = 0; + double fz = 0; for (int p = 0; p < l_nabf4; p++) { int n1 = l_pa4(p); int n2 = l_pa4(p + 1); int nn = n2 - n1; int jpm = l_idxi(j) + Ni*p + Ni*l_nabf4*m; - int k = 0; - for (int i1 = 0; i1 < l_nelements; i1++) { - for (int i2 = i1; i2 < l_nelements; i2++) { - for (int i3 = i2; i3 < l_nelements; i3++) { - for (int q = 0; q < nn; q++) { - double c = l_pc4(n1 + q) * cb4(jpm + N3*k); + int k = 0; + for (int i1 = 0; i1 < l_nelements; i1++) { + for (int i2 = i1; i2 < l_nelements; i2++) { + for (int i3 = i2; i3 < l_nelements; i3++) { + for (int q = 0; q < nn; q++) { + double c = l_pc4(n1 + q) * cb4(jpm + N3*k); int j1 = l_pb4(n1 + q); int j2 = l_pb4(n1 + q + l_Q4); int j3 = l_pb4(n1 + q + 2 * l_Q4); - + int idx1 = i1 + l_nelements * j1 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j); int idx2 = i2 + l_nelements * j2 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j); - int idx3 = i3 + l_nelements * j3 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j); + int idx3 = i3 + l_nelements * j3 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j); double c1 = l_sumU(idx1); double c2 = l_sumU(idx2 ); - double c3 = l_sumU(idx3); - double t12 = c*(c1 * c2); + double c3 = l_sumU(idx3); + double t12 = c*(c1 * c2); double t13 = c*(c1 * c3); - double t23 = c*(c2 * c3); - + double t23 = c*(c2 * c3); + int idxJ3 = j + Nij * j3; int idxJ2 = j + Nij * j2; - int idxJ1 = j + Nij * j1; + int idxJ1 = j + Nij * j1; double abfJ1 = l_abf(idxJ1); double abfJ2 = l_abf(idxJ2); double abfJ3 = l_abf(idxJ3); @@ -1226,7 +1226,7 @@ void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po double abfzJ1 = l_abfz(idxJ1); double abfzJ2 = l_abfz(idxJ2); double abfzJ3 = l_abfz(idxJ3); - + // Compute contributions for each condition if (typej == i3) { fx += t12 * (abfxJ3 * rbfM + rbfxM * abfJ3); @@ -1242,32 +1242,32 @@ void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po fx += t23 * (abfxJ1 * rbfM + rbfxM * abfJ1); fy += t23 * (abfyJ1 * rbfM + rbfyM * abfJ1); fz += t23 * (abfzJ1 * rbfM + rbfzM * abfJ1); - } + } } k += 1; } } } } - int ii = 3 * j; // Pre-compute the base index for dd3 + int ii = 3 * j; // Pre-compute the base index for dd3 Kokkos::atomic_add(&fij(0 + ii), fx); // Add the derivative with respect to x to the corresponding descriptor derivative Kokkos::atomic_add(&fij(1 + ii), fy); // Add the derivative with respect to y to the corresponding descriptor derivative - Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative - }); + Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative + }); } } template -void PairPODKokkos::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4, - t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, int l_nelements, +void PairPODKokkos::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4, + t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni) { int totalIterations = l_nrbf4 * Ni; if (l_nelements==1) { Kokkos::parallel_for("fourbody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf4; // Derive the original j value - int m = idx % l_nrbf4; // Derive the original m value - int idxU = l_K3 * m + l_K3*l_nrbf3*i; + int m = idx % l_nrbf4; // Derive the original m value + int idxU = l_K3 * m + l_K3*l_nrbf3*i; for (int p = 0; p < l_nabf4; p++) { int n1 = l_pa4(p); int n2 = l_pa4(p + 1); @@ -1275,62 +1275,62 @@ void PairPODKokkos::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4, double c4 = cb4(i + Ni*p + Ni*l_nabf4*m); for (int q = 0; q < nn; q++) { int idxNQ = n1 + q; // Combine n1 and q into a single index - double c = c4 * l_pc4[idxNQ]; + double c = c4 * l_pc4[idxNQ]; int j1 = idxU + l_pb4(idxNQ); int j2 = idxU + l_pb4(idxNQ + l_Q4); int j3 = idxU + l_pb4(idxNQ + 2 * l_Q4); double c1 = l_sumU(j1); double c2 = l_sumU(j2); - double c3 = l_sumU(j3); - fb4[j3] += c * c1 * c2; + double c3 = l_sumU(j3); + fb4[j3] += c * c1 * c2; fb4[j2] += c * c1 * c3; - fb4[j1] += c * c2 * c3; + fb4[j1] += c * c2 * c3; } } }); } - else { + else { int N3 = Ni * l_nabf4 * l_nrbf4; Kokkos::parallel_for("fourbody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf4; // Derive the original j value - int m = idx % l_nrbf4; // Derive the original m value + int m = idx % l_nrbf4; // Derive the original m value for (int p = 0; p < l_nabf4; p++) { int n1 = l_pa4(p); int n2 = l_pa4(p + 1); int nn = n2 - n1; - int jpm = i + Ni*p + Ni*l_nabf4*m; - for (int q = 0; q < nn; q++) { - double c = l_pc4(n1 + q); + int jpm = i + Ni*p + Ni*l_nabf4*m; + for (int q = 0; q < nn; q++) { + double c = l_pc4(n1 + q); int j1 = l_pb4(n1 + q); int j2 = l_pb4(n1 + q + l_Q4); - int j3 = l_pb4(n1 + q + 2 * l_Q4); + int j3 = l_pb4(n1 + q + 2 * l_Q4); int idx1 = l_nelements * j1 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i; int idx2 = l_nelements * j2 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i; - int idx3 = l_nelements * j3 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i; - int k = 0; - for (int i1 = 0; i1 < l_nelements; i1++) { + int idx3 = l_nelements * j3 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i; + int k = 0; + for (int i1 = 0; i1 < l_nelements; i1++) { double c1 = l_sumU[idx1 + i1]; - for (int i2 = i1; i2 < l_nelements; i2++) { - double c2 = l_sumU[idx2 + i2]; - for (int i3 = i2; i3 < l_nelements; i3++) { - double c3 = l_sumU[idx3 + i3]; + for (int i2 = i1; i2 < l_nelements; i2++) { + double c2 = l_sumU[idx2 + i2]; + for (int i3 = i2; i3 < l_nelements; i3++) { + double c3 = l_sumU[idx3 + i3]; double c4 = c * cb4[jpm + N3*k]; - fb4[idx3 + i3] += c4*(c1 * c2); + fb4[idx3 + i3] += c4*(c1 * c2); fb4[idx2 + i2] += c4*(c1 * c3); - fb4[idx1 + i1] += c4*(c2 * c3); - k += 1; - } + fb4[idx1 + i1] += c4*(c2 * c3); + k += 1; + } } } } } - }); + }); } } template -void PairPODKokkos::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoeff, t_pod_1d l_rbf, t_pod_1d l_rbfx, - t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, +void PairPODKokkos::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoeff, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, int Nij) { int totalIterations = l_nrbf3 * Nij; @@ -1345,23 +1345,23 @@ void PairPODKokkos::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoe double rbfzBase = l_rbfz(idxR); double fx = 0; double fy = 0; - double fz = 0; + double fz = 0; for (int k = 0; k < l_K3; k++) { int idxU = l_nelements * k + l_nelements * l_K3 * m + l_nelements*l_K3*l_nrbf3*l_idxi[j]; double fc = l_forcecoeff[i2 + idxU]; - int idxA = j + Nij*k; // Pre-compute the index for abf - double abfA = l_abf[idxA]; + int idxA = j + Nij*k; // Pre-compute the index for abf + double abfA = l_abf[idxA]; double abfxA = l_abfx[idxA]; double abfyA = l_abfy[idxA]; - double abfzA = l_abfz[idxA]; - fx += fc * (abfxA * rbfBase + rbfxBase * abfA); // K3*nrbf3*Nij + double abfzA = l_abfz[idxA]; + fx += fc * (abfxA * rbfBase + rbfxBase * abfA); // K3*nrbf3*Nij fy += fc * (abfyA * rbfBase + rbfyBase * abfA); - fz += fc * (abfzA * rbfBase + rbfzBase * abfA); - } - int ii = 3 * j; // Pre-compute the base index for dd3 + fz += fc * (abfzA * rbfBase + rbfzBase * abfA); + } + int ii = 3 * j; // Pre-compute the base index for dd3 Kokkos::atomic_add(&fij(0 + ii), fx); // Add the derivative with respect to x to the corresponding descriptor derivative Kokkos::atomic_add(&fij(1 + ii), fy); // Add the derivative with respect to y to the corresponding descriptor derivative - Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative + Kokkos::atomic_add(&fij(2 + ii), fz); // Add the derivative with respect to z to the corresponding descriptor derivative }); } @@ -1378,9 +1378,9 @@ void PairPODKokkos::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2 } template -void PairPODKokkos::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2, t_pod_1d c12, t_pod_1d d1, +void PairPODKokkos::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2, t_pod_1d c12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni) -{ +{ int totalIterations = n12 * Ni; Kokkos::parallel_for("crossdesc_reduction", totalIterations, KOKKOS_LAMBDA(int idx) { int n = idx % Ni; // Ni @@ -1389,9 +1389,9 @@ void PairPODKokkos::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2, int k2 = ind2(m); // dd2 int m1 = n + Ni * k1; // d1 int m2 = n + Ni * k2; // d2 - double c = c12(n + Ni * m); - Kokkos::atomic_add(&cb1(m1), c * d2(m2)); - Kokkos::atomic_add(&cb2(m2), c * d1(m1)); + double c = c12(n + Ni * m); + Kokkos::atomic_add(&cb1(m1), c * d2(m2)); + Kokkos::atomic_add(&cb2(m2), c * d1(m1)); }); } @@ -1400,123 +1400,123 @@ void PairPODKokkos::set_array_to_zero(t_pod_1d a, int N) { Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) { a(i) = 0.0; - }); + }); } template void PairPODKokkos::blockatom_base_descriptors(t_pod_1d bd, int Ni, int Nij) -{ - auto begin = std::chrono::high_resolution_clock::now(); - auto end = std::chrono::high_resolution_clock::now(); - +{ + auto begin = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); + auto d2 = Kokkos::subview(bd, std::make_pair(0, Ni * nl2)); auto d3 = Kokkos::subview(bd, std::make_pair(Ni * nl2, Ni * (nl2 + nl3))); auto d4 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3), Ni * (nl2 + nl3 + nl4))); auto d33 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4), Ni * (nl2 + nl3 + nl4 + nl33))); auto d34 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl33), Ni * (nl2 + nl3 + nl4 + nl33 + nl34))); auto d44 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl33 + nl34), Ni * (nl2 + nl3 + nl4 + nl33 + nl34 + nl44))); - - begin = std::chrono::high_resolution_clock::now(); - radialbasis(abf, abfx, abfy, abfz, rij, besselparams, rin, rmax, + + begin = std::chrono::high_resolution_clock::now(); + radialbasis(abf, abfx, abfy, abfz, rij, besselparams, rin, rmax, besseldegree, inversedegree, nbesselpars, ns, Nij); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[10] += std::chrono::duration_cast(end-begin).count()/1e6; - - begin = std::chrono::high_resolution_clock::now(); - matrixMultiply(abf, Phi, rbf, Nij, ns, nrbfmax); - matrixMultiply(abfx, Phi, rbfx, Nij, ns, nrbfmax); - matrixMultiply(abfy, Phi, rbfy, Nij, ns, nrbfmax); + end = std::chrono::high_resolution_clock::now(); + comptime[10] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + matrixMultiply(abf, Phi, rbf, Nij, ns, nrbfmax); + matrixMultiply(abfx, Phi, rbfx, Nij, ns, nrbfmax); + matrixMultiply(abfy, Phi, rbfy, Nij, ns, nrbfmax); matrixMultiply(abfz, Phi, rbfz, Nij, ns, nrbfmax); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[11] += std::chrono::duration_cast(end-begin).count()/1e6; + end = std::chrono::high_resolution_clock::now(); + comptime[11] += std::chrono::duration_cast(end-begin).count()/1e6; - begin = std::chrono::high_resolution_clock::now(); + begin = std::chrono::high_resolution_clock::now(); set_array_to_zero(d2, Ni*nl2); - twobodydesc(d2, rbf, idxi, tj, nrbf2, Ni, Nij); + twobodydesc(d2, rbf, idxi, tj, nrbf2, Ni, Nij); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[12] += std::chrono::duration_cast(end-begin).count()/1e6; - - if ((nl3 > 0) && (Nij>1)) { - begin = std::chrono::high_resolution_clock::now(); + end = std::chrono::high_resolution_clock::now(); + comptime[12] += std::chrono::duration_cast(end-begin).count()/1e6; + + if ((nl3 > 0) && (Nij>1)) { + begin = std::chrono::high_resolution_clock::now(); angularbasis(abf, abfx, abfy, abfz, rij, pq3, K3, Nij); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[13] += std::chrono::duration_cast(end-begin).count()/1e6; + end = std::chrono::high_resolution_clock::now(); + comptime[13] += std::chrono::duration_cast(end-begin).count()/1e6; - begin = std::chrono::high_resolution_clock::now(); - set_array_to_zero(sumU, nelements * nrbf3 * K3 * Ni); + begin = std::chrono::high_resolution_clock::now(); + set_array_to_zero(sumU, nelements * nrbf3 * K3 * Ni); radialangularsum(sumU, rbf, abf, tj, numij, nelements, nrbf3, K3, Ni, Nij); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[14] += std::chrono::duration_cast(end-begin).count()/1e6; + end = std::chrono::high_resolution_clock::now(); + comptime[14] += std::chrono::duration_cast(end-begin).count()/1e6; - begin = std::chrono::high_resolution_clock::now(); + begin = std::chrono::high_resolution_clock::now(); //set_array_to_zero(d3, Ni*nl3); threebodydesc(d3, sumU, pc3, pn3, nelements, nrbf3, nabf3, K3, Ni); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[15] += std::chrono::duration_cast(end-begin).count()/1e6; + end = std::chrono::high_resolution_clock::now(); + comptime[15] += std::chrono::duration_cast(end-begin).count()/1e6; } - + if ((nl4 > 0) && (Nij>2)) { - begin = std::chrono::high_resolution_clock::now(); - //set_array_to_zero(d4, Ni*nl4); + begin = std::chrono::high_resolution_clock::now(); + //set_array_to_zero(d4, Ni*nl4); fourbodydesc(d4, sumU, pa4, pb4, pc4, nelements, nrbf3, nrbf4, nabf4, K3, Q4, Ni); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[16] += std::chrono::duration_cast(end-begin).count()/1e6; + end = std::chrono::high_resolution_clock::now(); + comptime[16] += std::chrono::duration_cast(end-begin).count()/1e6; } - + if ((nl33>0) && (Nij>3)) { - begin = std::chrono::high_resolution_clock::now(); + begin = std::chrono::high_resolution_clock::now(); crossdesc(d33, d3, d3, ind33l, ind33r, nl33, Ni); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[17] += std::chrono::duration_cast(end-begin).count()/1e6; - } - + end = std::chrono::high_resolution_clock::now(); + comptime[17] += std::chrono::duration_cast(end-begin).count()/1e6; + } + if ((nl34>0) && (Nij>4)) { - begin = std::chrono::high_resolution_clock::now(); + begin = std::chrono::high_resolution_clock::now(); crossdesc(d34, d3, d4, ind34l, ind34r, nl34, Ni); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[18] += std::chrono::duration_cast(end-begin).count()/1e6; + end = std::chrono::high_resolution_clock::now(); + comptime[18] += std::chrono::duration_cast(end-begin).count()/1e6; } if ((nl44>0) && (Nij>5)) { - begin = std::chrono::high_resolution_clock::now(); + begin = std::chrono::high_resolution_clock::now(); crossdesc(d44, d4, d4, ind44l, ind44r, nl44, Ni); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[19] += std::chrono::duration_cast(end-begin).count()/1e6; - } + end = std::chrono::high_resolution_clock::now(); + comptime[19] += std::chrono::duration_cast(end-begin).count()/1e6; + } } template void PairPODKokkos::blockatom_base_coefficients(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni) -{ - auto cefs = coefficients; - auto tyai = typeai; +{ + auto cefs = coefficients; + auto tyai = typeai; int nDes = Mdesc; int nCoeff = nCoeffPerElement; - - Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) { + + Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) { int nc = nCoeff*(tyai[n]-1); ei[n] = cefs[0 + nc]; - for (int m=0; m @@ -1527,35 +1527,35 @@ void PairPODKokkos::blockatom_environment_descriptors(t_pod_1d ei, t auto D = Kokkos::subview(pd, std::make_pair(2 * Ni * nClusters, 3 * Ni * nClusters)); auto pca = Kokkos::subview(pd, std::make_pair(3 * Ni * nClusters, 3 * Ni * nClusters + Ni * nComponents)); auto sumD = Kokkos::subview(pd, std::make_pair(3 * Ni * nClusters + Ni * nComponents, 3 * Ni * nClusters + Ni * nComponents + Ni)); - + auto proj = Proj; auto cent = Centroids; - auto cefs = coefficients; - auto tyai = typeai; - + auto cefs = coefficients; + auto tyai = typeai; + int nCom = nComponents; int nCls = nClusters; int nDes = Mdesc; int nCoeff = nCoeffPerElement; - + int totalIterations = Ni*nCom; - Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) { int i = idx % Ni; - int k = idx / Ni; + int k = idx / Ni; double sum = 0.0; int typei = tyai[i]-1; for (int m = 0; m < nDes; m++) { sum += proj[k + nCom*m + nCom*nDes*typei] * B[i + Ni*m]; } - pca[i + Ni*k] = sum; + pca[i + Ni*k] = sum; }); - + totalIterations = Ni*nCls; - Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) { int i = idx % Ni; - int j = idx / Ni; + int j = idx / Ni; int typei = tyai[i]-1; - double sum = 1e-20; + double sum = 1e-20; for (int k = 0; k < nCom; k++) { double c = cent[k + j * nCom + nCls*nCom*typei]; double p = pca[i + Ni*k]; @@ -1563,49 +1563,49 @@ void PairPODKokkos::blockatom_environment_descriptors(t_pod_1d ei, t } D[i + Ni*j] = 1.0 / sum; }); - - Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) { - double sum = 0; - for (int j = 0; j < nCls; j++) sum += D[i + Ni*j]; + + Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) { + double sum = 0; + for (int j = 0; j < nCls; j++) sum += D[i + Ni*j]; sumD[i] = sum; - for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum; + for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum; }); - - Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) { + + Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) { int nc = nCoeff*(tyai[n]-1); ei[n] = cefs[0 + nc]; for (int k = 0; k::blockatom_environment_descriptors(t_pod_1d ei, t double dD_dB = 0.0; double D2 = 2 * D[i + Ni*k] * D[i + Ni*k]; for (int n = 0; n < nCom; n++) { - double dD_dpca = D2 * (cent[n + k * nCom + nCls*nCom*typei] - pca[i + Ni*n]); + double dD_dpca = D2 * (cent[n + k * nCom + nCls*nCom*typei] - pca[i + Ni*n]); dD_dB += dD_dpca * proj[n + m * nCom + nCom*nDes*typei]; - } + } dP_dB += dP_dD * dD_dB; - } - sum += cp[i + Ni*j]*dP_dB; + } + sum += cp[i + Ni*j]*dP_dB; } cb[i + Ni*m] += sum; - }); + }); } template void PairPODKokkos::blockatom_energyforce(t_pod_1d l_ei, t_pod_1d l_fij, int Ni, int Nij) -{ - auto begin = std::chrono::high_resolution_clock::now(); - auto end = std::chrono::high_resolution_clock::now(); - +{ + auto begin = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); + // calculate base descriptors and their derivatives with respect to atom coordinates - begin = std::chrono::high_resolution_clock::now(); - blockatom_base_descriptors(bd, Ni, Nij); + begin = std::chrono::high_resolution_clock::now(); + blockatom_base_descriptors(bd, Ni, Nij); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[4] += std::chrono::duration_cast(end-begin).count()/1e6; - - begin = std::chrono::high_resolution_clock::now(); - if (nClusters > 1) { + end = std::chrono::high_resolution_clock::now(); + comptime[4] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + if (nClusters > 1) { blockatom_environment_descriptors(l_ei, cb, bd, Ni); } else { blockatom_base_coefficients(l_ei, cb, bd, Ni); - } + } Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[5] += std::chrono::duration_cast(end-begin).count()/1e6; - - begin = std::chrono::high_resolution_clock::now(); + end = std::chrono::high_resolution_clock::now(); + comptime[5] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); auto d3 = Kokkos::subview(bd, std::make_pair(Ni * nl2, Ni * (nl2 + nl3))); - auto d4 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3), Ni * (nl2 + nl3 + nl4))); + auto d4 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3), Ni * (nl2 + nl3 + nl4))); auto cb2 = Kokkos::subview(cb, std::make_pair(0, Ni * nl2)); auto cb3 = Kokkos::subview(cb, std::make_pair(Ni * nl2, Ni * (nl2 + nl3))); auto cb4 = Kokkos::subview(cb, std::make_pair(Ni * (nl2 + nl3), Ni * (nl2 + nl3 + nl4))); auto cb33 = Kokkos::subview(cb, std::make_pair(Ni * (nl2 + nl3 + nl4), Ni * (nl2 + nl3 + nl4 + nl33))); auto cb34 = Kokkos::subview(cb, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl33), Ni * (nl2 + nl3 + nl4 + nl33 + nl34))); auto cb44 = Kokkos::subview(cb, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl33 + nl34), Ni * (nl2 + nl3 + nl4 + nl33 + nl34 + nl44))); - + if ((nl33>0) && (Nij>3)) { - crossdesc_reduction(cb3, cb3, cb33, d3, d3, ind33l, ind33r, nl33, Ni); + crossdesc_reduction(cb3, cb3, cb33, d3, d3, ind33l, ind33r, nl33, Ni); } if ((nl34>0) && (Nij>4)) { - crossdesc_reduction(cb3, cb4, cb34, d3, d4, ind34l, ind34r, nl34, Ni); - } + crossdesc_reduction(cb3, cb4, cb34, d3, d4, ind34l, ind34r, nl34, Ni); + } if ((nl44>0) && (Nij>5)) { - crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44, Ni); - } + crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44, Ni); + } Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[6] += std::chrono::duration_cast(end-begin).count()/1e6; - - begin = std::chrono::high_resolution_clock::now(); - set_array_to_zero(l_fij, 3*Nij); - if ((nl2 > 0) && (Nij>0)) twobody_forces(l_fij, cb2, rbfx, rbfy, rbfz, idxi, tj, nrbf2, Ni, Nij); + end = std::chrono::high_resolution_clock::now(); + comptime[6] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + set_array_to_zero(l_fij, 3*Nij); + if ((nl2 > 0) && (Nij>0)) twobody_forces(l_fij, cb2, rbfx, rbfy, rbfz, idxi, tj, nrbf2, Ni, Nij); Kokkos::fence(); - end = std::chrono::high_resolution_clock::now(); - comptime[7] += std::chrono::duration_cast(end-begin).count()/1e6; - - set_array_to_zero(forcecoeff, nelements * nrbf3 * K3 * Ni); - if ((nl3 > 0) && (Nij>1)) threebody_forcecoeff(forcecoeff, cb3, sumU, + end = std::chrono::high_resolution_clock::now(); + comptime[7] += std::chrono::duration_cast(end-begin).count()/1e6; + + set_array_to_zero(forcecoeff, nelements * nrbf3 * K3 * Ni); + if ((nl3 > 0) && (Nij>1)) threebody_forcecoeff(forcecoeff, cb3, sumU, pc3, pn3, elemindex, nelements, nrbf3, nabf3, K3, Ni); - if ((nl4 > 0) && (Nij>2)) fourbody_forcecoeff(forcecoeff, cb4, sumU, - pa4, pb4, pc4, nelements, nrbf3, nrbf4, nabf4, K3, Q4, Ni); - if ((nl3 > 0) && (Nij>1)) allbody_forces(l_fij, forcecoeff, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, + if ((nl4 > 0) && (Nij>2)) fourbody_forcecoeff(forcecoeff, cb4, sumU, + pa4, pb4, pc4, nelements, nrbf3, nrbf4, nabf4, K3, Q4, Ni); + if ((nl3 > 0) && (Nij>1)) allbody_forces(l_fij, forcecoeff, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, idxi, tj, nelements, nrbf3, nabf3, K3, Nij); - -// begin = std::chrono::high_resolution_clock::now(); -// if ((nl3 > 0) && (Nij>1)) threebody_forces(l_fij, cb3, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, + +// begin = std::chrono::high_resolution_clock::now(); +// if ((nl3 > 0) && (Nij>1)) threebody_forces(l_fij, cb3, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, // idxi, tj, pc3, pn3, elemindex, nelements, nrbf3, nabf3, K3, Ni, Nij); // Kokkos::fence(); -// end = std::chrono::high_resolution_clock::now(); -// comptime[8] += std::chrono::duration_cast(end-begin).count()/1e6; -// -// begin = std::chrono::high_resolution_clock::now(); -// if ((nl4 > 0) && (Nij>2)) fourbody_forces(l_fij, cb4, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, idxi, tj, -// pa4, pb4, pc4, elemindex, nelements, nrbf3, nrbf4, nabf4, K3, Q4, Ni, Nij); +// end = std::chrono::high_resolution_clock::now(); +// comptime[8] += std::chrono::duration_cast(end-begin).count()/1e6; +// +// begin = std::chrono::high_resolution_clock::now(); +// if ((nl4 > 0) && (Nij>2)) fourbody_forces(l_fij, cb4, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, idxi, tj, +// pa4, pb4, pc4, elemindex, nelements, nrbf3, nrbf4, nabf4, K3, Q4, Ni, Nij); // Kokkos::fence(); -// end = std::chrono::high_resolution_clock::now(); -// comptime[9] += std::chrono::duration_cast(end-begin).count()/1e6; +// end = std::chrono::high_resolution_clock::now(); +// comptime[9] += std::chrono::duration_cast(end-begin).count()/1e6; } template -void PairPODKokkos::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij) +void PairPODKokkos::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij) { auto l_f = f; Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) { @@ -1724,7 +1724,7 @@ void PairPODKokkos::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_ } template -void PairPODKokkos::tallyenergy(t_pod_1d l_ei, int istart, int Ni) +void PairPODKokkos::tallyenergy(t_pod_1d l_ei, int istart, int Ni) { auto l_eatom = d_eatom; @@ -1748,40 +1748,40 @@ void PairPODKokkos::tallyenergy(t_pod_1d l_ei, int istart, int Ni) } template -void PairPODKokkos::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij) -{ +void PairPODKokkos::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij) +{ auto l_vatom = d_vatom; if (vflag_global) { for (int j=0; j<3; j++) { - F_FLOAT sum = 0.0; - Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + F_FLOAT sum = 0.0; + Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(j + k3) * l_fij(j + k3); }, sum); - virial[j] -= sum; + virial[j] -= sum; } - F_FLOAT sum = 0.0; + F_FLOAT sum = 0.0; Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(k3) * l_fij(1 + k3); }, sum); - virial[3] -= sum; - - sum = 0.0; + virial[3] -= sum; + + sum = 0.0; Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(k3) * l_fij(2 + k3); }, sum); - virial[4] -= sum; - - sum = 0.0; + virial[4] -= sum; + + sum = 0.0; Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(1+k3) * l_fij(2+k3); }, sum); - virial[5] -= sum; + virial[5] -= sum; } if (vflag_atom) { @@ -1812,9 +1812,9 @@ void PairPODKokkos::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po template void PairPODKokkos::savematrix2binfile(std::string filename, t_pod_1d d_A, int nrows, int ncols) { - auto A = Kokkos::create_mirror_view(d_A); - Kokkos::deep_copy(A, d_A); - + auto A = Kokkos::create_mirror_view(d_A); + Kokkos::deep_copy(A, d_A); + FILE *fp = fopen(filename.c_str(), "wb"); double sz[2]; sz[0] = (double) nrows; @@ -1827,9 +1827,9 @@ void PairPODKokkos::savematrix2binfile(std::string filename, t_pod_1 template void PairPODKokkos::saveintmatrix2binfile(std::string filename, t_pod_1i d_A, int nrows, int ncols) { - auto A = Kokkos::create_mirror_view(d_A); - Kokkos::deep_copy(A, d_A); - + auto A = Kokkos::create_mirror_view(d_A); + Kokkos::deep_copy(A, d_A); + FILE *fp = fopen(filename.c_str(), "wb"); int sz[2]; sz[0] = nrows; @@ -1842,28 +1842,28 @@ void PairPODKokkos::saveintmatrix2binfile(std::string filename, t_po template void PairPODKokkos::savedatafordebugging() { - saveintmatrix2binfile("podkktypeai.bin", typeai, ni, 1); - saveintmatrix2binfile("podkknumij.bin", numij, ni+1, 1); - saveintmatrix2binfile("podkkai.bin", ai, nij, 1); - saveintmatrix2binfile("podkkaj.bin", aj, nij, 1); - saveintmatrix2binfile("podkkti.bin", ti, nij, 1); - saveintmatrix2binfile("podkktj.bin", tj, nij, 1); - saveintmatrix2binfile("podkkidxi.bin", idxi, nij, 1); + saveintmatrix2binfile("podkktypeai.bin", typeai, ni, 1); + saveintmatrix2binfile("podkknumij.bin", numij, ni+1, 1); + saveintmatrix2binfile("podkkai.bin", ai, nij, 1); + saveintmatrix2binfile("podkkaj.bin", aj, nij, 1); + saveintmatrix2binfile("podkkti.bin", ti, nij, 1); + saveintmatrix2binfile("podkktj.bin", tj, nij, 1); + saveintmatrix2binfile("podkkidxi.bin", idxi, nij, 1); savematrix2binfile("podkkrbf.bin", rbf, nrbfmax, nij); savematrix2binfile("podkkrbfx.bin", rbfx, nrbfmax, nij); savematrix2binfile("podkkrbfy.bin", rbfy, nrbfmax, nij); - savematrix2binfile("podkkrbfz.bin", rbfz, nrbfmax, nij); + savematrix2binfile("podkkrbfz.bin", rbfz, nrbfmax, nij); int kmax = (K3 > ns) ? K3 : ns; savematrix2binfile("podkkabf.bin", abf, kmax, nij); savematrix2binfile("podkkabfx.bin", abfx, kmax, nij); savematrix2binfile("podkkabfy.bin", abfy, kmax, nij); - savematrix2binfile("podkkabfz.bin", abfz, kmax, nij); - savematrix2binfile("podkkbd.bin", bd, ni, Mdesc); - savematrix2binfile("podkksumU.bin", sumU, nelements * K3 * nrbfmax, ni); + savematrix2binfile("podkkabfz.bin", abfz, kmax, nij); + savematrix2binfile("podkkbd.bin", bd, ni, Mdesc); + savematrix2binfile("podkksumU.bin", sumU, nelements * K3 * nrbfmax, ni); savematrix2binfile("podkkrij.bin", rij, 3, nij); savematrix2binfile("podkkfij.bin", fij, 3, nij); - savematrix2binfile("podkkei.bin", ei, ni, 1); - + savematrix2binfile("podkkei.bin", ei, ni, 1); + error->all(FLERR, "Save data and stop the run for debugging"); } diff --git a/src/KOKKOS/pair_pod_kokkos.h b/src/KOKKOS/pair_pod_kokkos.h index 41dc89cd30..ab6cd3f827 100644 --- a/src/KOKKOS/pair_pod_kokkos.h +++ b/src/KOKKOS/pair_pod_kokkos.h @@ -32,7 +32,7 @@ namespace LAMMPS_NS { template class PairPODKokkos : public PairPOD { - public: + public: typedef DeviceType device_type; typedef ArrayTypes AT; @@ -43,7 +43,7 @@ class PairPODKokkos : public PairPOD { void coeff(int, char **) override; void init_style() override; double init_one(int, int) override; - + //protected: int inum, maxneigh; int host_flag; @@ -53,7 +53,7 @@ class PairPODKokkos : public PairPOD { typename AT::t_neighbors_2d d_neighbors; typename AT::t_int_1d d_ilist; - typename AT::t_int_1d d_numneigh; + typename AT::t_int_1d d_numneigh; // typename AT::t_int_1d_randomread d_ilist; // typename AT::t_int_1d_randomread d_numneigh; @@ -74,34 +74,34 @@ class PairPODKokkos : public PairPOD { friend void pair_virial_fdotr_compute(PairPODKokkos*); - void grow(int, int); + void grow(int, int); void copy_from_pod_class(EAPOD *podptr); void divideInterval(int *intervals, int N, int M); - int calculateNumberOfIntervals(int N, int intervalSize); + int calculateNumberOfIntervals(int N, int intervalSize); void grow_atoms(int Ni); void grow_pairs(int Nij); - + void allocate() override; double memory_usage() override; - typedef Kokkos::View t_pod_1i; + typedef Kokkos::View t_pod_1i; typedef Kokkos::View t_pod_1d; // typedef Kokkos::View t_pod_2i; // typedef Kokkos::View t_pod_2d; // typedef Kokkos::View t_pod_3d3; - + int atomBlockSize; // size of each atom block int nAtomBlocks; // number of atoms blocks int atomBlocks[101]; // atom blocks double comptime[100]; int timing; - - int ni; // number of atoms i in the current atom block - int nij; // number of pairs (i,j) in the current atom block + + int ni; // number of atoms i in the current atom block + int nij; // number of pairs (i,j) in the current atom block int nimax; // maximum number of atoms i - int nijmax; // maximum number of pairs (i,j) - - int nelements; // number of elements + int nijmax; // maximum number of pairs (i,j) + + int nelements; // number of elements int onebody; // one-body descriptors int besseldegree; // degree of Bessel functions int inversedegree; // degree of inverse functions @@ -110,116 +110,116 @@ class PairPODKokkos : public PairPOD { int ns; // number of snapshots for radial basis functions int nl1, nl2, nl3, nl4, nl23, nl33, nl34, nl44, nl; // number of local descriptors int nrbf2, nrbf3, nrbf4, nrbfmax; // number of radial basis functions - int nabf3, nabf4; // number of angular basis functions + int nabf3, nabf4; // number of angular basis functions int K3, K4, Q4; // number of monomials - + // environmental variables int nClusters; // number of environment clusters int nComponents; // number of principal components - int Mdesc; // number of base descriptors + int Mdesc; // number of base descriptors double rin; // inner cut-off radius double rcut; // outer cut-off radius - double rmax; // rcut - rin + double rmax; // rcut - rin double rcutsq; - + t_pod_1d rij; // (xj - xi) for all pairs (I, J) t_pod_1d fij; // force for all pairs (I, J) t_pod_1d ei; // energy for each atom I t_pod_1i typeai; // types of atoms I only - t_pod_1i numij; // number of pairs (I, J) for each atom I + t_pod_1i numij; // number of pairs (I, J) for each atom I t_pod_1i idxi; // storing linear indices of atom I for all pairs (I, J) t_pod_1i ai; // IDs of atoms I for all pairs (I, J) t_pod_1i aj; // IDs of atoms J for all pairs (I, J) t_pod_1i ti; // types of atoms I for all pairs (I, J) - t_pod_1i tj; // types of atoms J for all pairs (I, J) + t_pod_1i tj; // types of atoms J for all pairs (I, J) t_pod_1d besselparams; t_pod_1d Phi; // eigenvectors matrix ns x ns - t_pod_1d rbf; // radial basis functions nij x nrbfmax - t_pod_1d rbfx; // x-derivatives of radial basis functions nij x nrbfmax + t_pod_1d rbf; // radial basis functions nij x nrbfmax + t_pod_1d rbfx; // x-derivatives of radial basis functions nij x nrbfmax t_pod_1d rbfy; // y-derivatives of radial basis functions nij x nrbfmax - t_pod_1d rbfz; // z-derivatives of radial basis functions nij x nrbfmax + t_pod_1d rbfz; // z-derivatives of radial basis functions nij x nrbfmax t_pod_1d abf; // angular basis functions nij x K3 t_pod_1d abfx; // x-derivatives of angular basis functions nij x K3 - t_pod_1d abfy; // y-derivatives of angular basis functions nij x K3 + t_pod_1d abfy; // y-derivatives of angular basis functions nij x K3 t_pod_1d abfz; // z-derivatives of angular basis functions nij x K3 t_pod_1d sumU; // sum of radial basis functions ni x K3 x nrbfmax x nelements t_pod_1d forcecoeff; // force coefficients ni x K3 x nrbfmax x nelements t_pod_1d Proj; // PCA Projection matrix - t_pod_1d Centroids; // centroids of the clusters + t_pod_1d Centroids; // centroids of the clusters t_pod_1d bd; // base descriptors ni x Mdesc t_pod_1d cb; // force coefficients for base descriptors ni x Mdesc t_pod_1d pd; // environment probability descriptors ni x (1 + nComponents + 3*nClusters) t_pod_1d coefficients; // coefficients nCoeffPerElement x nelements t_pod_1i pq3, pn3, pc3; // arrays to compute 3-body angular basis functions - t_pod_1i pa4, pb4, pc4; // arrays to compute 4-body angular basis functions + t_pod_1i pa4, pb4, pc4; // arrays to compute 4-body angular basis functions t_pod_1i ind33l, ind33r; // nl33 t_pod_1i ind34l, ind34r; // nl34 t_pod_1i ind44l, ind44r; // nl44 - t_pod_1i elemindex; - + t_pod_1i elemindex; + void set_array_to_zero(t_pod_1d a, int N); - + int NeighborCount(t_pod_1i, double, int, int); - - void NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, t_pod_1i l_typeai, t_pod_1i l_idxi, + + void NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, t_pod_1i l_typeai, t_pod_1i l_idxi, t_pod_1i l_ai, t_pod_1i l_aj, t_pod_1i l_ti, t_pod_1i l_tj, double l_rcutsq, int gi1, int Ni); - - void radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, - t_pod_1d rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, - int l_inversedegree, int l_nbesselpars, int l_ns, int Nij); - - void matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2); - + + void radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, + t_pod_1d rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, + int l_inversedegree, int l_nbesselpars, int l_ns, int Nij); + + void matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2); + void angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, - t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N); - - void radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, + t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N); + + void radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, t_pod_1i l_numij, const int l_nelements, const int l_nrbf3, const int l_K3, const int Ni, const int Nij); - void twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij); - - void threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, + void twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij); + + void threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni); - - void fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, + + void fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni); - + void crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni); - - void crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2, t_pod_1d c12, t_pod_1d d1, - t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni); + + void crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2, t_pod_1d c12, t_pod_1d d1, + t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni); void blockatom_base_descriptors(t_pod_1d bd, int Ni, int Nij); void blockatom_base_coefficients(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni); - void blockatom_environment_descriptors(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni); - - void twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod_1d l_rbfx, t_pod_1d l_rbfy, t_pod_1d l_rbfz, + void blockatom_environment_descriptors(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni); + + void twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod_1d l_rbfx, t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij); - void threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_pod_1d l_rbf, t_pod_1d l_rbfx, - t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, - t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, + void threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, int Ni, int Nij); - void fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_pod_1d l_rbf, t_pod_1d l_rbfx, t_pod_1d l_rbfy, - t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, t_pod_1d l_sumU, - t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, + void fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_pod_1d l_rbf, t_pod_1d l_rbfx, t_pod_1d l_rbfy, + t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, t_pod_1d l_sumU, + t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni, int Nij); - - void threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3, t_pod_1d l_sumU, t_pod_1i l_pc3, + + void threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, int Ni); - - void fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4, t_pod_1d l_sumU, t_pod_1i l_pa4, + + void fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4, t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni); - void allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoeff, t_pod_1d l_rbf, t_pod_1d l_rbfx, - t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + void allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoeff, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, int Nij); - + void blockatom_energyforce(t_pod_1d l_ei, t_pod_1d l_fij, int Ni, int Nij); void tallyenergy(t_pod_1d l_ei, int istart, int Ni); - void tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij); - void tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij); - + void tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij); + void tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij); + void savematrix2binfile(std::string filename, t_pod_1d d_A, int nrows, int ncols); void saveintmatrix2binfile(std::string filename, t_pod_1i d_A, int nrows, int ncols); void savedatafordebugging(); diff --git a/src/ML-POD/compute_pod_atom.cpp b/src/ML-POD/compute_pod_atom.cpp index 3a2ef12b9a..29a6dc060a 100644 --- a/src/ML-POD/compute_pod_atom.cpp +++ b/src/ML-POD/compute_pod_atom.cpp @@ -34,33 +34,33 @@ enum{SCALAR,VECTOR,ARRAY}; ComputePODAtom::ComputePODAtom(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr) -{ +{ int nargmin = 7; if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style); if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); - + std::string pod_file = std::string(arg[3]); // pod input file std::string coeff_file = ""; // coefficient input file std::string proj_file = std::string(arg[4]); // coefficient input file - std::string centroid_file = std::string(arg[5]); // coefficient input file - podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); - + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + int ntypes = atom->ntypes; memory->create(map, ntypes + 1, "compute_pod_global:map"); - map_element2type(narg - 6, arg + 6, podptr->nelements); - - //size_array_rows = 1 + 3*atom->natoms; + map_element2type(narg - 6, arg + 6, podptr->nelements); + + //size_array_rows = 1 + 3*atom->natoms; //size_array_cols = podptr->nCoeffAll; - + cutmax = podptr->rcut; - nmax = 0; + nmax = 0; nijmax = 0; pod = nullptr; - elements = nullptr; - + elements = nullptr; + size_peratom_cols = podptr->Mdesc * podptr->nClusters; peratom_flag = 1; } @@ -120,23 +120,23 @@ void ComputePODAtom::compute_peratom() for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) { pod[i][icoeff] = 0.0; } - + // invoke full neighbor list (will copy or build if necessary) neighbor->build_one(list); - + double **x = atom->x; int **firstneigh = list->firstneigh; int *numneigh = list->numneigh; int *type = atom->type; int *ilist = list->ilist; - int inum = list->inum; + int inum = list->inum; int nClusters = podptr->nClusters; int Mdesc = podptr->Mdesc; double rcutsq = podptr->rcut*podptr->rcut; - + for (int ii = 0; ii < inum; ii++) { - int i = ilist[ii]; + int i = ilist[ii]; int jnum = numneigh[i]; // allocate temporary memory @@ -145,32 +145,32 @@ void ComputePODAtom::compute_peratom() podptr->free_temp_memory(); podptr->allocate_temp_memory(nijmax); } - - rij = &podptr->tmpmem[0]; - tmpmem = &podptr->tmpmem[3*nijmax]; - ai = &podptr->tmpint[0]; - aj = &podptr->tmpint[nijmax]; + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; ti = &podptr->tmpint[2*nijmax]; tj = &podptr->tmpint[3*nijmax]; // get neighbor list for atom i lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); - + if (nij > 0) { // peratom base descriptors double *bd = &podptr->bd[0]; double *bdd = &podptr->bdd[0]; - podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); if (nClusters>1) { // peratom env descriptors double *pd = &podptr->pd[0]; double *pdd = &podptr->pdd[0]; - podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); for (int k = 0; k < nClusters; k++) for (int m = 0; m < Mdesc; m++) { int mk = m + Mdesc*k; - pod[i][mk] = pd[k]*bd[m]; + pod[i][mk] = pd[k]*bd[m]; // for (int n=0; nall(FLERR, "Illegal compute {} command", style); + if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style); if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); - + std::string pod_file = std::string(arg[3]); // pod input file std::string coeff_file = ""; // coefficient input file std::string proj_file = std::string(arg[4]); // coefficient input file - std::string centroid_file = std::string(arg[5]); // coefficient input file - podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); - + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + int ntypes = atom->ntypes; memory->create(map, ntypes + 1, "compute_pod_global:map"); - map_element2type(narg - 6, arg + 6, podptr->nelements); - - size_array_rows = 1 + 3*atom->natoms; + map_element2type(narg - 6, arg + 6, podptr->nelements); + + size_array_rows = 1 + 3*atom->natoms; size_array_cols = podptr->nCoeffAll; cutmax = podptr->rcut; - + nijmax = 0; pod = nullptr; - elements = nullptr; + elements = nullptr; } /* ---------------------------------------------------------------------- */ @@ -87,7 +87,7 @@ void ComputePODGlobal::init() if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0) error->warning(FLERR,"More than one compute pod"); - + // allocate memory for global array memory->create(pod,size_array_rows,size_array_cols, "compute_pod_global:pod"); @@ -105,7 +105,7 @@ void ComputePODGlobal::init_list(int /*id*/, NeighList *ptr) void ComputePODGlobal::compute_array() { - // int ntotal = atom->nlocal + atom->nghost; + // int ntotal = atom->nlocal + atom->nghost; invoked_peratom = update->ntimestep; // clear global array @@ -113,23 +113,23 @@ void ComputePODGlobal::compute_array() for (int irow = 0; irow < size_array_rows; irow++) for (int icoeff = 0; icoeff < size_array_cols; icoeff++) pod[irow][icoeff] = 0.0; - + // invoke full neighbor list (will copy or build if necessary) neighbor->build_one(list); - + double **x = atom->x; int **firstneigh = list->firstneigh; int *numneigh = list->numneigh; int *type = atom->type; int *ilist = list->ilist; - int inum = list->inum; + int inum = list->inum; int nClusters = podptr->nClusters; int Mdesc = podptr->Mdesc; int nCoeffPerElement = podptr->nCoeffPerElement; - + double rcutsq = podptr->rcut*podptr->rcut; - + for (int ii = 0; ii < inum; ii++) { int i = ilist[ii]; int jnum = numneigh[i]; @@ -140,22 +140,22 @@ void ComputePODGlobal::compute_array() podptr->free_temp_memory(); podptr->allocate_temp_memory(nijmax); } - - rij = &podptr->tmpmem[0]; - tmpmem = &podptr->tmpmem[3*nijmax]; - ai = &podptr->tmpint[0]; - aj = &podptr->tmpint[nijmax]; + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; ti = &podptr->tmpint[2*nijmax]; tj = &podptr->tmpint[3*nijmax]; - + // get neighbor list for atom i lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); - + if (nij > 0) { // peratom base descriptors double *bd = &podptr->bd[0]; - double *bdd = &podptr->bdd[0]; - podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + double *bdd = &podptr->bdd[0]; + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); pod[0][nCoeffPerElement*(ti[0]-1)] += 1.0; // one-body descriptor @@ -163,7 +163,7 @@ void ComputePODGlobal::compute_array() // peratom env descriptors double *pd = &podptr->pd[0]; double *pdd = &podptr->pdd[0]; - podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); for (int j = 0; j < nClusters; j++) { for (int m=0; mall(FLERR, "Illegal compute {} command", style); if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); - + std::string pod_file = std::string(arg[3]); // pod input file std::string coeff_file = ""; // coefficient input file std::string proj_file = std::string(arg[4]); // coefficient input file - std::string centroid_file = std::string(arg[5]); // coefficient input file - podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); - + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + int ntypes = atom->ntypes; memory->create(map, ntypes + 1, "compute_pod_local:map"); - - map_element2type(narg - 6, arg + 6, podptr->nelements); - + + map_element2type(narg - 6, arg + 6, podptr->nelements); + int numdesc = podptr->Mdesc * podptr->nClusters; - size_array_rows = 1 + 3*atom->natoms; + size_array_rows = 1 + 3*atom->natoms; size_array_cols = atom->natoms*numdesc; cutmax = podptr->rcut; - + nijmax = 0; pod = nullptr; - elements = nullptr; + elements = nullptr; } /* ---------------------------------------------------------------------- */ @@ -89,7 +89,7 @@ void ComputePODLocal::init() if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0) error->warning(FLERR,"More than one compute pod"); - + // allocate memory for global array memory->create(pod,size_array_rows,size_array_cols, "compute_pod_local:pod"); @@ -107,7 +107,7 @@ void ComputePODLocal::init_list(int /*id*/, NeighList *ptr) void ComputePODLocal::compute_array() { - // int ntotal = atom->nlocal + atom->nghost; + // int ntotal = atom->nlocal + atom->nghost; invoked_peratom = update->ntimestep; // clear global array @@ -115,22 +115,22 @@ void ComputePODLocal::compute_array() for (int irow = 0; irow < size_array_rows; irow++) for (int icoeff = 0; icoeff < size_array_cols; icoeff++) pod[irow][icoeff] = 0.0; - + // invoke full neighbor list (will copy or build if necessary) neighbor->build_one(list); - + double **x = atom->x; int **firstneigh = list->firstneigh; int *numneigh = list->numneigh; int *type = atom->type; int *ilist = list->ilist; - int inum = list->inum; + int inum = list->inum; int nClusters = podptr->nClusters; int Mdesc = podptr->Mdesc; - + double rcutsq = podptr->rcut*podptr->rcut; - + for (int ii = 0; ii < inum; ii++) { int i = ilist[ii]; int jnum = numneigh[i]; @@ -141,32 +141,32 @@ void ComputePODLocal::compute_array() podptr->free_temp_memory(); podptr->allocate_temp_memory(nijmax); } - - rij = &podptr->tmpmem[0]; - tmpmem = &podptr->tmpmem[3*nijmax]; - ai = &podptr->tmpint[0]; - aj = &podptr->tmpint[nijmax]; + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; ti = &podptr->tmpint[2*nijmax]; tj = &podptr->tmpint[3*nijmax]; - + // get neighbor list for atom i lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); - + if (nij > 0) { // peratom base descriptors double *bd = &podptr->bd[0]; - double *bdd = &podptr->bdd[0]; - podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + double *bdd = &podptr->bdd[0]; + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); if (nClusters>1) { // peratom env descriptors double *pd = &podptr->pd[0]; double *pdd = &podptr->pdd[0]; - podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); for (int k = 0; k < nClusters; k++) for (int m = 0; m < Mdesc; m++) { int imk = m + Mdesc*k + Mdesc*nClusters*i; - pod[0][imk] = pd[k]*bd[m]; + pod[0][imk] = pd[k]*bd[m]; for (int n=0; nall(FLERR, "Illegal compute {} command", style); if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); - + std::string pod_file = std::string(arg[3]); // pod input file std::string coeff_file = ""; // coefficient input file std::string proj_file = std::string(arg[4]); // coefficient input file - std::string centroid_file = std::string(arg[5]); // coefficient input file - podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); - + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + int ntypes = atom->ntypes; memory->create(map, ntypes + 1, "compute_pod_global:map"); - map_element2type(narg - 6, arg + 6, podptr->nelements); - + map_element2type(narg - 6, arg + 6, podptr->nelements); + cutmax = podptr->rcut; - nmax = 0; + nmax = 0; nijmax = 0; pod = nullptr; - elements = nullptr; - + elements = nullptr; + size_peratom_cols = podptr->Mdesc * podptr->nClusters*3*atom->natoms; peratom_flag = 1; } @@ -117,23 +117,23 @@ void ComputePODDAtom::compute_peratom() for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) { pod[i][icoeff] = 0.0; } - + // invoke full neighbor list (will copy or build if necessary) neighbor->build_one(list); - + double **x = atom->x; int **firstneigh = list->firstneigh; int *numneigh = list->numneigh; int *type = atom->type; int *ilist = list->ilist; - int inum = list->inum; + int inum = list->inum; int nClusters = podptr->nClusters; int Mdesc = podptr->Mdesc; double rcutsq = podptr->rcut*podptr->rcut; - + for (int ii = 0; ii < inum; ii++) { - int i = ilist[ii]; + int i = ilist[ii]; int jnum = numneigh[i]; // allocate temporary memory @@ -142,34 +142,34 @@ void ComputePODDAtom::compute_peratom() podptr->free_temp_memory(); podptr->allocate_temp_memory(nijmax); } - - rij = &podptr->tmpmem[0]; - tmpmem = &podptr->tmpmem[3*nijmax]; - ai = &podptr->tmpint[0]; - aj = &podptr->tmpint[nijmax]; + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; ti = &podptr->tmpint[2*nijmax]; tj = &podptr->tmpint[3*nijmax]; // get neighbor list for atom i lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); - + if (nij > 0) { // peratom base descriptors double *bd = &podptr->bd[0]; double *bdd = &podptr->bdd[0]; - podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); if (nClusters>1) { // peratom env descriptors double *pd = &podptr->pd[0]; double *pdd = &podptr->pdd[0]; - podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); for (int n=0; nall(FLERR,"number of coefficients in the coefficient file is not correct"); } if (nClusters > 1) { - // read projection matrix file to podstruct + // read projection matrix file to podstruct if (proj_file != "") { nproj = read_projection_matrix(proj_file); if (nproj != nComponents*Mdesc*nelements) error->all(FLERR,"number of coefficients in the projection file is not correct"); } - + // read centroids file to podstruct if (centroids_file != "") { ncentroids = read_centroids(centroids_file); if (ncentroids != nComponents*nClusters*nelements) error->all(FLERR,"number of coefficients in the projection file is not correct"); } - } + } } // destructor @@ -129,7 +129,7 @@ EAPOD::~EAPOD() memory->destroy(bdd); memory->destroy(pd); memory->destroy(pdd); - memory->destroy(coeff); + memory->destroy(coeff); memory->destroy(tmpmem); memory->destroy(tmpint); memory->destroy(pn3); @@ -150,7 +150,7 @@ EAPOD::~EAPOD() memory->destroy(ind44l); memory->destroy(ind33r); memory->destroy(ind34r); - memory->destroy(ind44r); + memory->destroy(ind44r); } void EAPOD::read_pod_file(std::string pod_file) @@ -268,7 +268,7 @@ void EAPOD::read_pod_file(std::string pod_file) if (P4 < P44) error->all(FLERR,"seven-body angular degree must be equal or less than four-body angular degree"); if (P3 > 12) error->all(FLERR,"three-body angular degree must be equal or less than 12"); - if (P4 > 6) error->all(FLERR,"four-body angular degree must be equal or less than 6"); + if (P4 > 6) error->all(FLERR,"four-body angular degree must be equal or less than 6"); int Ne = nelements; memory->create(elemindex, Ne*Ne, "elemindex"); @@ -416,7 +416,7 @@ void EAPOD::read_pod_file(std::string pod_file) utils::logmesg(lmp, "number of local descriptors per element for five-body potential: {}\n", nl33); utils::logmesg(lmp, "number of local descriptors per element for six-body potential: {}\n", nl34); utils::logmesg(lmp, "number of local descriptors per element for seven-body potential: {}\n", nl44); - utils::logmesg(lmp, "number of local descriptors per element for all potentials: {}\n", nl); + utils::logmesg(lmp, "number of local descriptors per element for all potentials: {}\n", nl); utils::logmesg(lmp, "number of global descriptors: {}\n", nCoeffAll); utils::logmesg(lmp, "**************** End of POD Potentials ****************\n\n"); } @@ -702,7 +702,7 @@ void EAPOD::peratombase_descriptors(double *bd1, double *bdd1, double *rij, doub for (int i=0; i<3*Nj*Mdesc; i++) bdd1[i] = 0.0; if (Nj == 0) return; - + double *d2 = &bd1[0]; // nl2 double *d3 = &bd1[nl2]; // nl3 double *d4 = &bd1[nl2 + nl3]; // nl4 @@ -711,8 +711,8 @@ void EAPOD::peratombase_descriptors(double *bd1, double *bdd1, double *rij, doub double *d34 = &bd1[nl2 + nl3 + nl4 + nl23 + nl33]; // nl34 double *d44 = &bd1[nl2 + nl3 + nl4 + nl23 + nl33 + nl34]; // nl44 - double *dd2 = &bdd1[0]; // 3*Nj*nl2 - double *dd3 = &bdd1[3*Nj*nl2]; // 3*Nj*nl3 + double *dd2 = &bdd1[0]; // 3*Nj*nl2 + double *dd3 = &bdd1[3*Nj*nl2]; // 3*Nj*nl3 double *dd4 = &bdd1[3*Nj*(nl2+nl3)]; // 3*Nj*nl4 double *dd23 = &bdd1[3*Nj*(nl2+nl3+nl4)]; // 3*Nj*nl23 double *dd33 = &bdd1[3*Nj*(nl2+nl3+nl4+nl23)]; // 3*Nj*nl33 @@ -797,7 +797,7 @@ void EAPOD::peratombase_descriptors(double *bd1, double *bdd1, double *rij, doub } } fourbodydescderiv(d4, dd4, sumU, Ux, Uy, Uz, tj, Nj); - + if ((nl34>0) && (Nj>4)) { crossdesc(d34, d3, d4, ind34l, ind34r, nl34); crossdescderiv(dd34, d3, d4, dd3, dd4, ind34l, ind34r, nl34, 3*Nj); @@ -812,39 +812,39 @@ void EAPOD::peratombase_descriptors(double *bd1, double *bdd1, double *rij, doub } double EAPOD::peratombase_coefficients(double *cb, double *bd, int *ti) -{ +{ int nc = nCoeffPerElement*(ti[0]-1); - + double ei = coeff[0 + nc]; - for (int m=0; m 0) && (Nj>2)) { fourbodydesc(d4, sumU); - + if ((nl34>0) && (Nj>4)) { crossdesc(d34, d3, d4, ind34l, ind34r, nl34); } @@ -1174,47 +1174,47 @@ double EAPOD::peratomenergyforce2(double *fij, double *rij, double *temp, } } } - - double *cb = &bdd[0]; - if (nClusters > 1) { + + double *cb = &bdd[0]; + if (nClusters > 1) { e += peratom_environment_descriptors(cb, bd, &temp[4*n1 + n5 + 4*n2], ti); } else { e += peratombase_coefficients(cb, bd, ti); - } - + } + double *cb2 = &cb[0]; // nl3 double *cb3 = &cb[nl2]; // nl3 double *cb4 = &cb[(nl2 + nl3)]; // nl4 double *cb33 = &cb[(nl2 + nl3 + nl4)]; // nl33 double *cb34 = &cb[(nl2 + nl3 + nl4 + nl33)]; // nl34 double *cb44 = &cb[(nl2 + nl3 + nl4 + nl33 + nl34)]; // nl44 - + if ((nl33>0) && (Nj>3)) { - crossdesc_reduction(cb3, cb3, cb33, d3, d3, ind33l, ind33r, nl33); + crossdesc_reduction(cb3, cb3, cb33, d3, d3, ind33l, ind33r, nl33); } if ((nl34>0) && (Nj>4)) { - crossdesc_reduction(cb3, cb4, cb34, d3, d4, ind34l, ind34r, nl34); - } + crossdesc_reduction(cb3, cb4, cb34, d3, d4, ind34l, ind34r, nl34); + } if ((nl44>0) && (Nj>5)) { - crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44); - } - - if ((nl2 > 0) && (Nj>0)) twobody_forces(fij, cb2, rbfx, rbfy, rbfz, tj, Nj); + crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44); + } + + if ((nl2 > 0) && (Nj>0)) twobody_forces(fij, cb2, rbfx, rbfy, rbfz, tj, Nj); // print_matrix("cb2", 1, nl2, cb2, 1); // print_matrix("rbfx", Nj, nrbf2, rbfx, Nj); // print_matrix("rbfy", Nj, nrbf2, rbfy, Nj); // print_matrix("rbfz", Nj, nrbf2, rbfz, Nj); // print_matrix("fij", 3, Nj, fij, 3); // error->all(FLERR,"stop"); - - // Initialize forcecoeff to zero + + // Initialize forcecoeff to zero double *forcecoeff = &cb[(nl2 + nl3 + nl4)]; // nl33 std::fill(forcecoeff, forcecoeff + nelements * K3 * nrbf3, 0.0); if ((nl3 > 0) && (Nj>1)) threebody_forcecoeff(forcecoeff, cb3, sumU); if ((nl4 > 0) && (Nj>2)) fourbody_forcecoeff(forcecoeff, cb4, sumU); - if ((nl3 > 0) && (Nj>1)) allbody_forces(fij, forcecoeff, Ux, Uy, Uz, tj, Nj); - + if ((nl3 > 0) && (Nj>1)) allbody_forces(fij, forcecoeff, Ux, Uy, Uz, tj, Nj); + return e; } @@ -1222,51 +1222,51 @@ double EAPOD::peratomenergyforce(double *fij, double *rij, double *temp, int *ti, int *tj, int Nj) { if (Nj==0) { - return coeff[nCoeffPerElement*(ti[0]-1)]; + return coeff[nCoeffPerElement*(ti[0]-1)]; } - + int N = 3*Nj; for (int n=0; n 1) { // multi-environment descriptors // calculate multi-environment descriptors and their derivatives with respect to atom coordinates peratomenvironment_descriptors(pd, pdd, bd, bdd, temp, ti[0] - 1, Nj); - + for (int j = 0; jcreate(bd, Mdesc, "bdd"); memory->create(bdd, 3*Nj*Mdesc, "bdd"); memory->create(pd, nClusters, "bdd"); - memory->create(pdd, 3*Nj*nClusters, "bdd"); + memory->create(pdd, 3*Nj*nClusters, "bdd"); } void EAPOD::free_temp_memory() @@ -2684,7 +2684,7 @@ void EAPOD::MatMul(double *c, double *a, double *b, int r1, int c1, int c2) c[i + r1*j] += a[i + r1*k] * b[k + c1*j]; } -void EAPOD::peratomenvironment_descriptors(double *P, double *dP_dR, double *B, double *dB_dR, double *tmp, int elem, int nNeighbors) +void EAPOD::peratomenvironment_descriptors(double *P, double *dP_dR, double *B, double *dB_dR, double *tmp, int elem, int nNeighbors) { double *ProjMat = &Proj[nComponents*Mdesc*elem]; double *centroids = &Centroids[nComponents*nClusters*elem]; @@ -2693,7 +2693,7 @@ void EAPOD::peratomenvironment_descriptors(double *P, double *dP_dR, double *B, double *dD_dpca = &tmp[nComponents + nClusters]; double *dD_dB = &tmp[nComponents + nClusters + nClusters*nComponents]; double *dP_dD = &tmp[nComponents + nClusters + nClusters*nComponents + nClusters*Mdesc]; - double *dP_dB = &tmp[nComponents + nClusters + nClusters*nComponents + nClusters*Mdesc + nClusters*nClusters]; + double *dP_dB = &tmp[nComponents + nClusters + nClusters*nComponents + nClusters*Mdesc + nClusters*nClusters]; // calculate principal components for (int k = 0; k < nComponents; k++) { @@ -2731,7 +2731,7 @@ void EAPOD::peratomenvironment_descriptors(double *P, double *dP_dR, double *B, char cht = 'T'; double alpha = 1.0, beta = 0.0; DGEMM(&chn, &chn, &nClusters, &Mdesc, &nComponents, &alpha, dD_dpca, &nClusters, ProjMat, &nComponents, &beta, dD_dB, &nClusters); - + // calculate dP_dD double S1 = 1 / sumD; double S2 = sumD * sumD; @@ -2744,12 +2744,12 @@ void EAPOD::peratomenvironment_descriptors(double *P, double *dP_dR, double *B, dP_dD[j + j * nClusters] += S1; } - // calculate dP_dB = dP_dD * dD_dB, which are derivatives of probabilities with respect to local descriptors + // calculate dP_dB = dP_dD * dD_dB, which are derivatives of probabilities with respect to local descriptors DGEMM(&chn, &chn, &nClusters, &Mdesc, &nClusters, &alpha, dP_dD, &nClusters, dD_dB, &nClusters, &beta, dP_dB, &nClusters); - - // calculate dP_dR = dB_dR * dP_dB , which are derivatives of probabilities with respect to atomic coordinates + + // calculate dP_dR = dB_dR * dP_dB , which are derivatives of probabilities with respect to atomic coordinates int N = 3*nNeighbors; - DGEMM(&chn, &cht, &N, &nClusters, &Mdesc, &alpha, dB_dR, &N, dP_dB, &nClusters, &beta, dP_dR, &N); + DGEMM(&chn, &cht, &N, &nClusters, &Mdesc, &alpha, dB_dR, &N, dP_dB, &nClusters, &beta, dP_dR, &N); } void EAPOD::init3bodyarray(int *np, int *pq, int *pc, int Pa) diff --git a/src/ML-POD/eapod.h b/src/ML-POD/eapod.h index b58114987d..0addbf416f 100644 --- a/src/ML-POD/eapod.h +++ b/src/ML-POD/eapod.h @@ -86,7 +86,7 @@ public: double rin; double rcut; int true4BodyDesc; - + int nelements; // number of elements int pbc[3]; @@ -110,8 +110,8 @@ public: int nClusters; // number of environment clusters int nComponents; // number of principal components //int nNeighbors; // numbe of neighbors - int Mdesc; // number of base descriptors - + int Mdesc; // number of base descriptors + double *Proj; // PCA Projection matrix double *Centroids; // centroids of the clusters double *bd; // base descriptors @@ -125,7 +125,7 @@ public: int Njmax; int nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters) int nCoeffAll; // number of coefficients for all elements = (nl1 + Mdesc*nClusters)*nelements - int ncoeff; // number of coefficients in the input file + int ncoeff; // number of coefficients in the input file int ns; // number of snapshots for radial basis functions int nd1, nd2, nd3, nd4, nd5, nd6, nd7, nd; // number of global descriptors int nl1, nl2, nl3, nl4, nl5, nl6, nl7, nl; // number of local descriptors @@ -167,19 +167,19 @@ public: int read_projection_matrix(std::string proj_file); int read_centroids(std::string centroids_file); - int estimate_temp_memory(int Nj); - void free_temp_memory(); + int estimate_temp_memory(int Nj); + void free_temp_memory(); void allocate_temp_memory(int Nj); //void mknewcoeff(); void mknewcoeff(double *c, int nc); - void twobodydesc(double *d2, double *rbf, int *tj, int N); + void twobodydesc(double *d2, double *rbf, int *tj, int N); void twobodydescderiv(double *d2, double *dd2, double *rbf, double *rbfx, double *rbfy, double *rbfz, int *tj, int N); void twobody_forces(double *fij, double *cb2, double *rbfx, double *rbfy, double *rbfz, int *tj, int Nj); - + void threebodydesc(double *d3, double *sumU); void threebodydescderiv(double *dd3, double *sumU, double *Ux, double *Uy, double *Uz, int *atomtype, int N); @@ -189,7 +189,7 @@ public: void fourbodydescderiv(double *d4, double *dd4, double *sumU, double *Ux, double *Uy, double *Uz, int *atomtype, int N); void fourbody_forcecoeff(double *fb4, double *cb4, double *sumU); - + void allbody_forces(double *fij, double *forcecoeff, double *rbf, double *rbfx, double *rbfy, double *rbfz, double *abf, double *abfx, double *abfy, double *abfz, int *tj, int Nj); void allbody_forces(double *fij, double *forcecoeff, double *Ux, double *Uy, double *Uz, int *tj, int Nj); @@ -204,7 +204,7 @@ public: int *ti, int *tj, int Nj); double peratombase_coefficients(double *cb, double *bd, int *ti); double peratom_environment_descriptors(double *cb, double *bd, double *tm, int *ti); - + void peratomenvironment_descriptors(double *P, double *dP_dR, double *B, double *dB_dR, double *tmp, int elem, int nNeighbors); void base_descriptors(double *basedesc, double *x, int *atomtype, int *alist, @@ -212,7 +212,7 @@ public: void descriptors(double *basedesc, double *probdesc, double *x, int *atomtype, int *alist, int *jlist, int *pairnumsum, int natom); - + double peratomenergyforce(double *fij, double *rij, double *temp, int *ti, int *tj, int Nj); double peratomenergyforce2(double *fij, double *rij, double *temp, int *ti, int *tj, int Nj); @@ -226,8 +226,8 @@ public: void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12); void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2, - int *ind1, int *ind2, int n12, int N); - void crossdesc_reduction(double *cb1, double *cb2, double *c12, double *d1, + int *ind1, int *ind2, int n12, int N); + void crossdesc_reduction(double *cb1, double *cb2, double *c12, double *d1, double *d2, int *ind1, int *ind2, int n12); }; diff --git a/src/ML-POD/fitpod_command.cpp b/src/ML-POD/fitpod_command.cpp index ca1b27e260..930843dc51 100644 --- a/src/ML-POD/fitpod_command.cpp +++ b/src/ML-POD/fitpod_command.cpp @@ -64,9 +64,9 @@ void FitPOD::command(int narg, char **arg) cent_file = std::string(arg[4]); // centroid input file else cent_file = ""; - - fastpodptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, cent_file); - + + fastpodptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, cent_file); + desc.nCoeffAll = fastpodptr->nCoeffAll; desc.nClusters = fastpodptr->nClusters; read_data_files(data_file, fastpodptr->species); @@ -76,11 +76,11 @@ void FitPOD::command(int narg, char **arg) if (desc.nClusters > 1) estimate_memory_neighborstruct(envdata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements); allocate_memory_neighborstruct(); estimate_memory_fastpod(traindata); - estimate_memory_fastpod(testdata); + estimate_memory_fastpod(testdata); allocate_memory_descriptorstruct(fastpodptr->nCoeffAll); if (coeff_file != "") podArrayCopy(desc.c, fastpodptr->coeff, fastpodptr->nCoeffAll); - + if (compute_descriptors==0) { if (((int) envdata.data_path.size() > 1) && (desc.nClusters > 1)) { @@ -631,7 +631,7 @@ void FitPOD::get_data(datastruct &data, const std::vector &species) // Group weights have same size as energy. memory->create(data.we, n, "fitpod:we"); memory->create(data.wf, n, "fitpod:wf"); - + n = data.num_atom_sum; memory->create(data.position, 3*n, "fitpod:position"); memory->create(data.force, 3*n, "fitpod:force"); @@ -811,7 +811,7 @@ void FitPOD::select_data(datastruct &newdata, const datastruct &data) // Group weights have same size as energy. memory->create(newdata.we, n, "fitpod:we"); memory->create(newdata.wf, n, "fitpod:wf"); - + n = newdata.num_atom_sum; memory->create(newdata.position, 3*n, "fitpod:newdata_position"); memory->create(newdata.force, 3*n, "fitpod:newdata_force"); @@ -942,7 +942,7 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vectorme == 0) utils::logmesg(lmp, "**************** Begin of Environment Configuration Set ****************\n"); - get_data(envdata, species); + get_data(envdata, species); if (comm->me == 0) utils::logmesg(lmp, "**************** End of Environment Configuration Set ****************\n"); compute_descriptors = tmp; @@ -993,7 +993,7 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vectorcreate(desc.bd, nb.natom_max*fastpodptr->Mdesc, "fitpod:desc_ld"); memory->create(desc.pd, nb.natom_max*fastpodptr->nClusters, "fitpod:desc_ld"); memory->create(desc.gd, nCoeffAll, "fitpod:desc_gd"); @@ -1280,21 +1280,21 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) { if (comm->me == 0) utils::logmesg(lmp, "**************** Begin Calculating Environment Descriptor Matrix ****************\n"); - - //printf("number of configurations = %d\n", (int) data.num_atom.size()); + + //printf("number of configurations = %d\n", (int) data.num_atom.size()); int nComponents = fastpodptr->nComponents; int Mdesc = fastpodptr->Mdesc; int nClusters = fastpodptr->nClusters; - int nelements = fastpodptr->nelements; + int nelements = fastpodptr->nelements; memory->create(fastpodptr->Centroids, nClusters*nComponents*nelements, "fitpod:centroids"); memory->create(fastpodptr->Proj, Mdesc*nComponents*nelements, "fitpod:P"); int nAtoms = 0; - int nTotalAtoms = 0; + int nTotalAtoms = 0; for (int ci=0; ci < (int) data.num_atom.size(); ci++) { - if ((ci % comm->nprocs) == comm->me) nAtoms += data.num_atom[ci]; - nTotalAtoms += data.num_atom[ci]; + if ((ci % comm->nprocs) == comm->me) nAtoms += data.num_atom[ci]; + nTotalAtoms += data.num_atom[ci]; } double *basedescmatrix = (double *) malloc(nAtoms*Mdesc*sizeof(double)); @@ -1314,19 +1314,19 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) char chu = 'U'; double alpha = 1.0, beta = 0.0; - for (int elem=0; elem < nelements; elem++) { + for (int elem=0; elem < nelements; elem++) { nElemAtoms[elem] = 0; // number of atoms for this element } for (int ci=0; ci < (int) data.num_atom.size(); ci++) { - if ((ci % comm->nprocs) == comm->me) { + if ((ci % comm->nprocs) == comm->me) { int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int *atomtype = &data.atomtype[natom_cumsum]; - for (int n=0; nme, nElemAtoms[0], nElemAtoms[1]); // printf("%d %d %d\n",comm->me, nElemAtomsCount[0], nElemAtomsCount[1]); // printf("%d %d %d %d\n",comm->me, nElemAtomsCumSum[0], nElemAtomsCumSum[1], nElemAtomsCumSum[2]); - + MPI_Barrier(MPI_COMM_WORLD); // loop over each configuration in the data set @@ -1352,45 +1352,45 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) base_descriptors_fastpod(data, ci); // basedescmatrix is a Mdesc x nAtoms matrix - int natom = data.num_atom[ci]; + int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int *atomtype = &data.atomtype[natom_cumsum]; for (int n=0; nme, nElemAtomsCount[0], nElemAtomsCount[1]); // MPI_Barrier(MPI_COMM_WORLD); -// -// for (int elem=0; elem < nelements; elem++) { // loop over each element +// +// for (int elem=0; elem < nelements; elem++) { // loop over each element // nAtoms = nElemAtoms[elem]; // nTotalAtoms = nAtoms; -// +// // MPI_Barrier(MPI_COMM_WORLD); // MPI_Allreduce(MPI_IN_PLACE, &nTotalAtoms, 1, MPI_INT, MPI_SUM, world); -// +// // double *descmatrix = &basedescmatrix[Mdesc*nElemAtomsCumSum[elem]]; // DGEMM(&chn, &cht, &Mdesc, &Mdesc, &nAtoms, &alpha, descmatrix, &Mdesc, descmatrix, &Mdesc, &beta, A, &Mdesc); -// -// MPI_Barrier(MPI_COMM_WORLD); -// MPI_Allreduce(MPI_IN_PLACE, A, Mdesc*Mdesc, MPI_DOUBLE, MPI_SUM, world); -// +// +// MPI_Barrier(MPI_COMM_WORLD); +// MPI_Allreduce(MPI_IN_PLACE, A, Mdesc*Mdesc, MPI_DOUBLE, MPI_SUM, world); +// // if (comm->me == 0) print_matrix("A", Mdesc, Mdesc, A, Mdesc); // } -// +// // MPI_Barrier(MPI_COMM_WORLD); // error->all(FLERR, "stop for debugging"); - + int save = 0; - for (int elem=0; elem < nelements; elem++) { // loop over each element + for (int elem=0; elem < nelements; elem++) { // loop over each element nAtoms = nElemAtoms[elem]; nTotalAtoms = nAtoms; MPI_Allreduce(MPI_IN_PLACE, &nTotalAtoms, 1, MPI_INT, MPI_SUM, world); @@ -1400,11 +1400,11 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) double *centroids = &fastpodptr->Centroids[nComponents*nClusters*elem]; // Calculate covariance matrix A = basedescmatrix*basedescmatrix'. A is a Mdesc x Mdesc matrix - DGEMM(&chn, &cht, &Mdesc, &Mdesc, &nAtoms, &alpha, descmatrix, &Mdesc, descmatrix, &Mdesc, &beta, A, &Mdesc); - MPI_Allreduce(MPI_IN_PLACE, A, Mdesc*Mdesc, MPI_DOUBLE, MPI_SUM, world); - + DGEMM(&chn, &cht, &Mdesc, &Mdesc, &nAtoms, &alpha, descmatrix, &Mdesc, descmatrix, &Mdesc, &beta, A, &Mdesc); + MPI_Allreduce(MPI_IN_PLACE, A, Mdesc*Mdesc, MPI_DOUBLE, MPI_SUM, world); + //if (comm->me == 0) print_matrix("A", Mdesc, Mdesc, A, Mdesc); - + if ((comm->me == 0) && (save==1)) savematrix2binfile(data.filenametag + "_covariance_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc); @@ -1427,15 +1427,15 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) // Calculate principal compoment analysis matrix pca = P*descmatrix. pca is a nComponents x nAtoms matrix DGEMM(&chn, &chn, &nComponents, &nAtoms, &Mdesc, &alpha, Proj, &nComponents, descmatrix, &Mdesc, &beta, pca, &nComponents); - // initialize centroids - for (int i = 0; i < nClusters * nComponents; i++) centroids[i] = 0.0; - for (int i=0; i < nAtoms; i++) { + // initialize centroids + for (int i = 0; i < nClusters * nComponents; i++) centroids[i] = 0.0; + for (int i=0; i < nAtoms; i++) { int m = (i*nClusters)/nAtoms; for (int j=0; j < nComponents; j++) centroids[j + nComponents*m] += pca[j + nComponents*i]; } - - MPI_Allreduce(MPI_IN_PLACE, centroids, nClusters * nComponents, MPI_DOUBLE, MPI_SUM, world); + + MPI_Allreduce(MPI_IN_PLACE, centroids, nClusters * nComponents, MPI_DOUBLE, MPI_SUM, world); double fac = ((double) nClusters)/((double) nTotalAtoms); for (int i = 0; i < nClusters * nComponents; i++) centroids[i] = centroids[i]*fac; //for (int i = 0; i < desc.nClusters * nComponents; i++) printf("centroids[%d] = %f\n", i, centroids[i]); @@ -1449,21 +1449,21 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) savematrix2binfile(data.filenametag + "_eigenvector_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc); savematrix2binfile(data.filenametag + "_eigenvalues_elem" + std::to_string(elem+1) + ".bin", b, Mdesc, 1); } - savematrix2binfile(data.filenametag + "_desc_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", descmatrix, Mdesc, nAtoms); + savematrix2binfile(data.filenametag + "_desc_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", descmatrix, Mdesc, nAtoms); savematrix2binfile(data.filenametag + "_pca_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", pca, nComponents, nAtoms); saveintmatrix2binfile(data.filenametag + "_cluster_assignments_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", assignments, nAtoms, 1); } //savematrix2binfile(data.filenametag + "_pca_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", pca, nComponents, nAtoms); - //saveintmatrix2binfile(data.filenametag + "_cluster_assignments_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", assignments, nAtoms, 1); - + //saveintmatrix2binfile(data.filenametag + "_cluster_assignments_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", assignments, nAtoms, 1); + MPI_Barrier(MPI_COMM_WORLD); } if (comm->me == 0) { savedata2textfile(data.filenametag + "_projection_matrix" + ".pod", "projection_matrix: {}\n ", fastpodptr->Proj, nComponents*Mdesc*nelements, 1, 1); - savedata2textfile(data.filenametag + "_centroids" + ".pod", "centroids: {} \n", fastpodptr->Centroids, nComponents*nClusters*nelements, 1, 1); + savedata2textfile(data.filenametag + "_centroids" + ".pod", "centroids: {} \n", fastpodptr->Centroids, nComponents*nClusters*nelements, 1, 1); } - + free(basedescmatrix); free(pca); free(A); @@ -1827,9 +1827,9 @@ void FitPOD::error_analysis(const datastruct &data, double *coeff) int natom = data.num_atom[ci]; int nforce = dim*natom; - emae += outarray[4 + m*ci]; // sum_c |ePOD_c - eDFT_c|/natom_c - essr += outarray[4 + m*ci]*outarray[4 + m*ci]; // sum_c |ePOD_c - eDFT_c|^2/(natom_c)^2 - fmae += outarray[7 + m*ci]*nforce; // sum_c |fPOD_c - fDFT_c| + emae += outarray[4 + m*ci]; // sum_c |ePOD_c - eDFT_c|/natom_c + essr += outarray[4 + m*ci]*outarray[4 + m*ci]; // sum_c |ePOD_c - eDFT_c|^2/(natom_c)^2 + fmae += outarray[7 + m*ci]*nforce; // sum_c |fPOD_c - fDFT_c| fssr += ssrarray[ci]; nforceall += nforce; ci += 1; @@ -1844,15 +1844,15 @@ void FitPOD::error_analysis(const datastruct &data, double *coeff) errors[3 + 4*q] = sqrt(fssr/nforceall); nf += nforceall; - errors[0] += emae; // sum_c |ePOD_c - eDFT_c|/natom_c - errors[1] += essr; // sum_c |ePOD_c - eDFT_c|^2/(natom_c)^2 + errors[0] += emae; // sum_c |ePOD_c - eDFT_c|/natom_c + errors[1] += essr; // sum_c |ePOD_c - eDFT_c|^2/(natom_c)^2 errors[2] += fmae; errors[3] += fssr; } if (nc == 0) nc = 1; if (nf == 0) nf = 1; - errors[0] = errors[0]/nc; // (1/Nc) * sum_c |ePOD_c - eDFT_c|/natom_c + errors[0] = errors[0]/nc; // (1/Nc) * sum_c |ePOD_c - eDFT_c|/natom_c errors[1] = sqrt(errors[1]/nc); // sqrt { (1/Nc) * sum_c |ePOD_c - eDFT_c|^2/(natom_c)^2 } errors[2] = errors[2]/nf; errors[3] = sqrt(errors[3]/nf); @@ -2210,13 +2210,13 @@ void FitPOD::saveintmatrix2binfile(std::string filename, int *A, int nrows, int void FitPOD::savedata2textfile(std::string filename, std::string text, double *A, int n, int m, int dim) { - if (comm->me == 0) { + if (comm->me == 0) { int precision = 15; - FILE *fp = fopen(filename.c_str(), "w"); + FILE *fp = fopen(filename.c_str(), "w"); if (dim==1) { fmt::print(fp, text, n); for (int i = 0; i < n; i++) - fmt::print(fp, "{:<10.{}f} \n", A[i], precision); + fmt::print(fp, "{:<10.{}f} \n", A[i], precision); } else if (dim==2) { fmt::print(fp, text, n); diff --git a/src/ML-POD/fitpod_command.h b/src/ML-POD/fitpod_command.h index c6a2e167c5..8f6ac11177 100644 --- a/src/ML-POD/fitpod_command.h +++ b/src/ML-POD/fitpod_command.h @@ -124,14 +124,14 @@ private: double *gdd=nullptr; // derivatives of global descriptors and peratom descriptors double *A=nullptr; // least-square matrix for all descriptors double *b=nullptr; // least-square vector for all descriptors - double *c=nullptr; // coefficents of descriptors - int szd = 0; + double *c=nullptr; // coefficents of descriptors + int szd = 0; int nCoeffAll = 0; // number of global descriptors - int nClusters = 0; // number of environment clusters + int nClusters = 0; // number of environment clusters }; int save_descriptors = 0; - int compute_descriptors = 0; + int compute_descriptors = 0; datastruct traindata; datastruct testdata; datastruct envdata; @@ -162,7 +162,7 @@ private: void matrix33_multiplication(double *xrot, double *Rmat, double *x, int natom); void matrix33_inverse(double *invA, double *A1, double *A2, double *A3); - double squareDistance(const double *a, const double *b, int DIMENSIONS); + double squareDistance(const double *a, const double *b, int DIMENSIONS); void assignPointsToClusters(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSION); void updateCentroids(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS); void KmeansClustering(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS, int MAX_ITER); @@ -196,7 +196,7 @@ private: void estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, int nelements); void allocate_memory_neighborstruct(); void allocate_memory_descriptorstruct(int nd); - void estimate_memory_fastpod(const datastruct &data); + void estimate_memory_fastpod(const datastruct &data); void local_descriptors_fastpod(const datastruct &data, int ci); void base_descriptors_fastpod(const datastruct &data, int ci); void least_squares_matrix(const datastruct &data, int ci); @@ -204,7 +204,7 @@ private: void descriptors_calculation(const datastruct &data); void environment_cluster_calculation(const datastruct &data); void print_analysis(const datastruct &data, double *outarray, double *errors); - void error_analysis(const datastruct &data, double *coeff); + void error_analysis(const datastruct &data, double *coeff); double energyforce_calculation_fastpod(double *force, const datastruct &data, int ci); void energyforce_calculation(const datastruct &data, double *coeff); }; diff --git a/src/ML-POD/pair_pod.cpp b/src/ML-POD/pair_pod.cpp index 66f720273b..9b79e82416 100644 --- a/src/ML-POD/pair_pod.cpp +++ b/src/ML-POD/pair_pod.cpp @@ -54,7 +54,7 @@ PairPOD::PairPOD(LAMMPS *lmp) : Pair(lmp), fastpodptr(nullptr) ni = 0; nimax = 0; nij = 0; - nijmax = 0; + nijmax = 0; atomBlockSize = 10; nAtomBlocks = 0; @@ -81,7 +81,7 @@ PairPOD::PairPOD(LAMMPS *lmp) : Pair(lmp), fastpodptr(nullptr) forcecoeff = nullptr; Centroids = nullptr; Proj = nullptr; - bd = nullptr; + bd = nullptr; cb = nullptr; bdd = nullptr; pd = nullptr; @@ -98,7 +98,7 @@ PairPOD::PairPOD(LAMMPS *lmp) : Pair(lmp), fastpodptr(nullptr) ind34r = nullptr; ind44l = nullptr; ind44r = nullptr; - elemindex = nullptr; + elemindex = nullptr; } /* ---------------------------------------------------------------------- */ @@ -148,7 +148,7 @@ PairPOD::~PairPOD() memory->destroy(elemindex); delete fastpodptr; - + if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); @@ -181,8 +181,8 @@ void PairPOD::compute(int eflag, int vflag) double rcutsq = rcut*rcut; double evdwl = 0.0; - int blockMode = 0; - if (blockMode==0) { + int blockMode = 0; + if (blockMode==0) { for (int ii = 0; ii < inum; ii++) { int i = ilist[ii]; int jnum = numneigh[i]; @@ -194,17 +194,17 @@ void PairPOD::compute(int eflag, int vflag) fastpodptr->allocate_temp_memory(nijmax); } - double *rij1 = &fastpodptr->tmpmem[0]; - double *fij1 = &fastpodptr->tmpmem[3*nijmax]; - double *tmp = &fastpodptr->tmpmem[6*nijmax]; - int *ai1 = &fastpodptr->tmpint[0]; - int *aj1 = &fastpodptr->tmpint[nijmax]; + double *rij1 = &fastpodptr->tmpmem[0]; + double *fij1 = &fastpodptr->tmpmem[3*nijmax]; + double *tmp = &fastpodptr->tmpmem[6*nijmax]; + int *ai1 = &fastpodptr->tmpint[0]; + int *aj1 = &fastpodptr->tmpint[nijmax]; int *ti1 = &fastpodptr->tmpint[2*nijmax]; - int *tj1 = &fastpodptr->tmpint[3*nijmax]; + int *tj1 = &fastpodptr->tmpint[3*nijmax]; lammpsNeighborList(rij1, ai1, aj1, ti1, tj1, x, firstneigh, type, map, numneigh, rcutsq, i); - + evdwl = fastpodptr->peratomenergyforce2(fij1, rij1, tmp, ti1, tj1, nij); - + // tally atomic energy to global energy ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); @@ -219,34 +219,34 @@ void PairPOD::compute(int eflag, int vflag) fij1[0 + 3*jj],fij1[1 + 3*jj],fij1[2 + 3*jj], -rij1[0 + 3*jj], -rij1[1 + 3*jj], -rij1[2 + 3*jj]); } - } - } + } + } } else if (blockMode == 1) { // determine the number of atom blocks and divide atoms into blocks nAtomBlocks = calculateNumberOfIntervals(inum, atomBlockSize); - if (nAtomBlocks > 100) nAtomBlocks = 100; + if (nAtomBlocks > 100) nAtomBlocks = 100; divideInterval(atomBlocks, inum, nAtomBlocks); int nmax = 0; - for (int block =0; blocknClusters > 1) { - if (proj_file == "") error->all(FLERR,"The projection file name can not be empty when the number of clusters is greater than 1."); + if (proj_file == "") error->all(FLERR,"The projection file name can not be empty when the number of clusters is greater than 1."); if (centroid_file == "") error->all(FLERR,"The centroids file name can not be empty when the number of clusters is greater than 1."); } - + copy_data_from_pod_class(); rcut = fastpodptr->rcut; - + memory->destroy(fastpodptr->tmpmem); memory->destroy(fastpodptr->tmpint); @@ -361,7 +361,7 @@ double PairPOD::memory_usage() return bytes; } -void PairPOD::lammpsNeighborList(double *rij1, int *ai1, int *aj1, int *ti1, int *tj1, +void PairPOD::lammpsNeighborList(double *rij1, int *ai1, int *aj1, int *ti1, int *tj1, double **x, int **firstneigh, int *atomtypes, int *map, int *numneigh, double rcutsq, int gi) { @@ -389,12 +389,12 @@ void PairPOD::lammpsNeighborList(double *rij1, int *ai1, int *aj1, int *ti1, int } void PairPOD::NeighborCount(double **x, int **firstneigh, int *ilist, int *numneigh, double rcutsq, int gi1, int gi2) -{ +{ for (int i=0; i 1e-20) n++; + if (rsq < rcutsq && rsq > 1e-20) n++; } numij[1+i] = n; - } + } } int PairPOD::numberOfNeighbors() { int n = 0; for (int i=1; i<=ni; i++) { - n += numij[i]; - numij[i] += numij[i-1]; + n += numij[i]; + numij[i] += numij[i-1]; } return n; } void PairPOD::NeighborList(double **x, int **firstneigh, int *atomtypes, int *map, int *ilist, int *numneigh, double rcutsq, int gi1, int gi2) -{ +{ for (int i=0; inelements; // number of elements + nelements = fastpodptr->nelements; // number of elements onebody = fastpodptr->onebody; // one-body descriptors besseldegree = fastpodptr->besseldegree; // degree of Bessel functions inversedegree = fastpodptr->inversedegree; // degree of inverse functions @@ -560,13 +560,13 @@ void PairPOD::copy_data_from_pod_class() nrbf4 = fastpodptr->nrbf4; nrbfmax = fastpodptr->nrbfmax; // number of radial basis functions nabf3 = fastpodptr->nabf3; // number of three-body angular basis functions - nabf4 = fastpodptr->nabf4; // number of four-body angular basis functions + nabf4 = fastpodptr->nabf4; // number of four-body angular basis functions K3 = fastpodptr->K3; // number of three-body monomials K4 = fastpodptr->K4; // number of four-body monomials Q4 = fastpodptr->Q4; // number of four-body monomial coefficients nClusters = fastpodptr->nClusters; // number of environment clusters nComponents = fastpodptr->nComponents; // number of principal components - Mdesc = fastpodptr->Mdesc; // number of base descriptors + Mdesc = fastpodptr->Mdesc; // number of base descriptors rin = fastpodptr->rin; rcut = fastpodptr->rcut; @@ -574,13 +574,13 @@ void PairPOD::copy_data_from_pod_class() besselparams[0] = fastpodptr->besselparams[0]; besselparams[1] = fastpodptr->besselparams[1]; besselparams[2] = fastpodptr->besselparams[2]; - + memory->create(abftm, 4*K3, "abftm"); memory->create(elemindex, nelements*nelements, "elemindex"); for (int i=0; ielemindex[i]; memory->create(Phi, ns * ns, "pair_pod:Phi"); - for (int i=0; iPhi[i]; memory->create(coefficients, nCoeffPerElement * nelements, "pair_pod:coefficients"); @@ -596,7 +596,7 @@ void PairPOD::copy_data_from_pod_class() for (int i=0; iCentroids[i]; } - + memory->create(pn3, nabf3+1, "pn3"); // array stores the number of monomials for each degree memory->create(pq3, K3*2, "pq3"); // array needed for the recursive computation of the angular basis functions memory->create(pc3, K3, "pc3"); // array needed for the computation of the three-body descriptors @@ -608,8 +608,8 @@ void PairPOD::copy_data_from_pod_class() for (int i=0; ipq3[i]; for (int i=0; ipa4[i]; for (int i=0; ipb4[i]; - for (int i=0; ipc4[i]; - + for (int i=0; ipc4[i]; + memory->create(ind33l, nl33, "pair_pod:ind33l"); memory->create(ind33r, nl33, "pair_pod:ind33r"); memory->create(ind34l, nl34, "pair_pod:ind34l"); @@ -621,7 +621,7 @@ void PairPOD::copy_data_from_pod_class() for (int i=0; iind34l[i]; for (int i=0; iind34r[i]; for (int i=0; iind44l[i]; - for (int i=0; iind44r[i]; + for (int i=0; iind44r[i]; } void PairPOD::grow_atoms(int Ni) @@ -639,13 +639,13 @@ void PairPOD::grow_atoms(int Ni) memory->create(ei, nimax, "pair_pod:ei"); memory->create(typeai, nimax, "pair_pod:typeai"); memory->create(numij, nimax+1, "pair_pod:typeai"); - int n = nimax * nelements * K3 * nrbfmax; + int n = nimax * nelements * K3 * nrbfmax; memory->create(sumU, n , "pair_pod:sumU"); memory->create(forcecoeff, n , "pair_pod:forcecoeff"); memory->create(bd, nimax * Mdesc, "pair_pod:bd"); memory->create(cb, nimax * Mdesc, "pair_pod:bd"); - if (nClusters > 1) memory->create(pd, nimax * (1 + nComponents + 3*nClusters), "pair_pod:pd"); - + if (nClusters > 1) memory->create(pd, nimax * (1 + nComponents + 3*nClusters), "pair_pod:pd"); + for (int i=0; i<=nimax; i++) numij[i] = 0; } } @@ -667,12 +667,12 @@ void PairPOD::grow_pairs(int Nij) memory->destroy(abf); memory->destroy(abfx); memory->destroy(abfy); - memory->destroy(abfz); + memory->destroy(abfz); // memory->destroy(bdd); -// memory->destroy(pdd); +// memory->destroy(pdd); nijmax = Nij; memory->create(rij, 3 * nijmax, "pair_pod:r_ij"); - memory->create(fij, 3 * nijmax, "pair_pod:f_ij"); + memory->create(fij, 3 * nijmax, "pair_pod:f_ij"); memory->create(idxi, nijmax, "pair_pod:idxi"); memory->create(ai, nijmax, "pair_pod:ai"); memory->create(aj, nijmax, "pair_pod:aj"); @@ -686,13 +686,13 @@ void PairPOD::grow_pairs(int Nij) memory->create(abf, nijmax * kmax, "pair_pod:abf"); memory->create(abfx, nijmax * kmax, "pair_pod:abfx"); memory->create(abfy, nijmax * kmax, "pair_pod:abfy"); - memory->create(abfz, nijmax * kmax, "pair_pod:abfz"); + memory->create(abfz, nijmax * kmax, "pair_pod:abfz"); // memory->create(bdd, 3 * nijmax * Mdesc, "pair_pod:bdd"); -// memory->create(pdd, 3 * nijmax * nClusters, "pair_pod:pdd"); +// memory->create(pdd, 3 * nijmax * nClusters, "pair_pod:pdd"); } } -void PairPOD::divideInterval(int *intervals, int N, int M) +void PairPOD::divideInterval(int *intervals, int N, int M) { int intervalSize = N / M; // Basic size of each interval int remainder = N % M; // Remainder to distribute @@ -702,10 +702,10 @@ void PairPOD::divideInterval(int *intervals, int N, int M) if (remainder > 0) { remainder--; } - } + } } -int PairPOD::calculateNumberOfIntervals(int N, int intervalSize) +int PairPOD::calculateNumberOfIntervals(int N, int intervalSize) { if (intervalSize <= 0) { printf("Interval size must be a positive integer.\n"); @@ -805,12 +805,12 @@ void PairPOD::radialbasis(double *rbft, double *rbftx, double *rbfty, double *rb rbfty[idxni] = drbftdr*dr2; rbftz[idxni] = drbftdr*dr3; } - + // Calculate fcut/dij and dfcut/dij f1 = fcut/dij; for (int i=0; i0) && (Nij>0)) { @@ -1805,9 +1805,9 @@ void PairPOD::blockatom_base_descriptors(double *bd1, int Ni, int Nij) if ((nl33>0) && (Nij>3)) { crossdesc(d33, d3, d3, ind33l, ind33r, nl33, Ni); } - + if ((nl4 > 0) && (Nij>2)) { - if (K4 < K3) { + if (K4 < K3) { fourbodydesc(d4, Ni); } @@ -1834,8 +1834,8 @@ void PairPOD::blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int N double *d34 = &bd1[Ni*(nl2 + nl3 + nl4 + nl33)]; // nl34 double *d44 = &bd1[Ni*(nl2 + nl3 + nl4 + nl33 + nl34)]; // nl44 - double *dd2 = &bdd1[0]; // 3*Nj*nl2 - double *dd3 = &bdd1[3*Nij*nl2]; // 3*Nj*nl3 + double *dd2 = &bdd1[0]; // 3*Nj*nl2 + double *dd3 = &bdd1[3*Nij*nl2]; // 3*Nj*nl3 double *dd4 = &bdd1[3*Nij*(nl2+nl3)]; // 3*Nj*nl4 double *dd33 = &bdd1[3*Nij*(nl2+nl3+nl4)]; // 3*Nj*nl33 double *dd34 = &bdd1[3*Nij*(nl2+nl3+nl4+nl33)]; // 3*Nj*nl34 @@ -1858,11 +1858,11 @@ void PairPOD::blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int N crossdesc(d33, d3, d3, ind33l, ind33r, nl33, Ni); crossdescderiv(dd33, d3, d3, dd3, dd3, ind33l, ind33r, idxi, nl33, Ni, Nij); } - + if ((nl4 > 0) && (Nij>2)) { - if (K4 < K3) { + if (K4 < K3) { fourbodydesc(d4, Ni); - fourbodydescderiv(dd4, Ni, Nij); + fourbodydescderiv(dd4, Ni, Nij); } if ((nl34>0) && (Nij>4)) { @@ -1879,62 +1879,62 @@ void PairPOD::blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int N } void PairPOD::blockatom_base_coefficients(double *ei, double *cb, double *B, int Ni) -{ - double *cefs = &coefficients[0]; - int *tyai = &typeai[0]; - +{ + double *cefs = &coefficients[0]; + int *tyai = &typeai[0]; + int nDes = Mdesc; int nCoeff = nCoeffPerElement; - + for (int n=0; n 1) { + blockatom_base_descriptors(bd, Ni, Nij); + + if (nClusters > 1) { blockatom_environment_descriptors(ei, cb, bd, Ni); } else { blockatom_base_coefficients(ei, cb, bd, Ni); - } + } } void PairPOD::blockatom_forces(double *fij, int Ni, int Nij) { - + int nld = nl2 + nl3 + nl4; for (int i=0; i<3*Nij*nld; i++) bdd[i] = 0.0; - + // double *d3 = &bd[Ni*nl2]; // nl3 -// double *d4 = &bd[Ni*(nl2 + nl3)]; // nl4 - double *dd2 = &bdd[0]; // 3*Nj*nl2 - double *dd3 = &bdd[3*Nij*nl2]; // 3*Nj*nl3 +// double *d4 = &bd[Ni*(nl2 + nl3)]; // nl4 + double *dd2 = &bdd[0]; // 3*Nj*nl2 + double *dd3 = &bdd[3*Nij*nl2]; // 3*Nj*nl3 double *dd4 = &bdd[3*Nij*(nl2+nl3)]; // 3*Nj*nl4 // double *dd33 = &bdd[3*Nij*(nl2+nl3+nl4)]; // 3*Nj*nl33 // double *dd34 = &bdd[3*Nij*(nl2+nl3+nl4+nl33)]; // 3*Nj*nl34 // double *dd44 = &bdd[3*Nij*(nl2+nl3+nl4+nl33+nl34)]; // 3*Nj*nl44 if ((nl2 > 0) && (Nij>0)) twobodydescderiv(dd2, Ni, Nij); - if ((nl3 > 0) && (Nij>1)) threebodydescderiv(dd3, Ni, Nij); - if ((nl4 > 0) && (Nij>2)) fourbodydescderiv(dd4, Ni, Nij); - + if ((nl3 > 0) && (Nij>1)) threebodydescderiv(dd3, Ni, Nij); + if ((nl4 > 0) && (Nij>2)) fourbodydescderiv(dd4, Ni, Nij); + double *d3 = &bd[Ni*nl2]; // nl3 - double *d4 = &bd[Ni*(nl2 + nl3)]; // nl4 + double *d4 = &bd[Ni*(nl2 + nl3)]; // nl4 double *cb2 = &cb[0]; // nl3 double *cb3 = &cb[Ni*nl2]; // nl3 double *cb4 = &cb[Ni*(nl2 + nl3)]; // nl4 double *cb33 = &cb[Ni*(nl2 + nl3 + nl4)]; // nl33 double *cb34 = &cb[Ni*(nl2 + nl3 + nl4 + nl33)]; // nl34 double *cb44 = &cb[Ni*(nl2 + nl3 + nl4 + nl33 + nl34)]; // nl44 - + if (nl33>0) crossdesc_reduction(cb3, cb3, cb33, d3, d3, ind33l, ind33r, nl33, Ni); - + if (nl34>0) crossdesc_reduction(cb3, cb4, cb34, d3, d4, ind34l, ind34r, nl34, Ni); - - if (nl44>0) crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44, Ni); - -// for (int n=0; n<3*Nij; n++) fij[n] = 0; + + if (nl44>0) crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44, Ni); + +// for (int n=0; n<3*Nij; n++) fij[n] = 0; // twobody_forces(fij, cb2, Ni, Nij); // threebody_forces(fij, cb3, Ni, Nij); // fourbody_forces(fij, cb4, Ni, Nij); - - -// if ((nl33>0) && (Nij>3)) { + + +// if ((nl33>0) && (Nij>3)) { // crossdescderiv(dd33, d3, d3, dd3, dd3, ind33l, ind33r, idxi, nl33, Ni, Nij); // } -// +// // if ((nl34>0) && (Nij>4)) { // crossdescderiv(dd34, d3, d4, dd3, dd4, ind34l, ind34r, idxi, nl34, Ni, Nij); // } -// +// // if ((nl44>0) && (Nij>5)) { // crossdescderiv(dd44, d4, d4, dd4, dd4, ind44l, ind44r, idxi, nl44, Ni, Nij); -// } - +// } + int N3 = 3*Nij; for (int n=0; n 1) { + blockatom_base_descriptors(bd, Ni, Nij); + + if (nClusters > 1) { blockatom_environment_descriptors(ei, cb, bd, Ni); } else { blockatom_base_coefficients(ei, cb, bd, Ni); - } - + } + double *d3 = &bd[Ni*nl2]; // nl3 - double *d4 = &bd[Ni*(nl2 + nl3)]; // nl4 + double *d4 = &bd[Ni*(nl2 + nl3)]; // nl4 double *cb2 = &cb[0]; // nl3 double *cb3 = &cb[Ni*nl2]; // nl3 double *cb4 = &cb[Ni*(nl2 + nl3)]; // nl4 double *cb33 = &cb[Ni*(nl2 + nl3 + nl4)]; // nl33 double *cb34 = &cb[Ni*(nl2 + nl3 + nl4 + nl33)]; // nl34 double *cb44 = &cb[Ni*(nl2 + nl3 + nl4 + nl33 + nl34)]; // nl44 - + if ((nl33>0) && (Nij>3)) { - crossdesc_reduction(cb3, cb3, cb33, d3, d3, ind33l, ind33r, nl33, Ni); + crossdesc_reduction(cb3, cb3, cb33, d3, d3, ind33l, ind33r, nl33, Ni); } if ((nl34>0) && (Nij>4)) { - crossdesc_reduction(cb3, cb4, cb34, d3, d4, ind34l, ind34r, nl34, Ni); - } + crossdesc_reduction(cb3, cb4, cb34, d3, d4, ind34l, ind34r, nl34, Ni); + } if ((nl44>0) && (Nij>5)) { - crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44, Ni); - } - - for (int n=0; n<3*Nij; n++) fij[n] = 0; - if ((nl2 > 0) && (Nij>0)) twobody_forces(fij, cb2, Ni, Nij); + crossdesc_reduction(cb4, cb4, cb44, d4, d4, ind44l, ind44r, nl44, Ni); + } + + for (int n=0; n<3*Nij; n++) fij[n] = 0; + if ((nl2 > 0) && (Nij>0)) twobody_forces(fij, cb2, Ni, Nij); //if ((nl3 > 0) && (Nij>1)) threebody_forces(fij, cb3, Ni, Nij); - //if ((nl4 > 0) && (Nij>2)) fourbody_forces(fij, cb4, Ni, Nij); - + //if ((nl4 > 0) && (Nij>2)) fourbody_forces(fij, cb4, Ni, Nij); + // Initialize forcecoeff to zero std::fill(forcecoeff, forcecoeff + Ni * nelements * K3 * nrbf3, 0.0); if ((nl3 > 0) && (Nij>1)) threebody_forcecoeff(forcecoeff, cb3, Ni); if ((nl4 > 0) && (Nij>2)) fourbody_forcecoeff(forcecoeff, cb4, Ni); - if ((nl3 > 0) && (Nij>1)) allbody_forces(fij, forcecoeff, Nij); + if ((nl3 > 0) && (Nij>1)) allbody_forces(fij, forcecoeff, Nij); } void PairPOD::blockatomenergyforce(double *ei, double *fij, int Ni, int Nij) -{ +{ blockatom_energyforce(ei, fij, Ni, Nij); - - //blockatom_energies(ei, Ni, Nij); - //blockatom_forces(fij, Ni, Nij); - + + //blockatom_energies(ei, Ni, Nij); + //blockatom_forces(fij, Ni, Nij); + // // calculate base descriptors and their derivatives with respect to atom coordinates -// blockatombase_descriptors(bd, bdd, Ni, Nij); -// -// if (nClusters > 1) { +// blockatombase_descriptors(bd, bdd, Ni, Nij); +// +// if (nClusters > 1) { // //double *cb = &sumU[0]; // blockatom_environment_descriptors(ei, cb, bd, Ni); -// +// // int N3 = 3*Nij; // for (int n=0; n ns) ? K3 : ns; savematrix2binfile("podabf.bin", abf, kmax, nij); savematrix2binfile("podabfx.bin", abfx, kmax, nij); savematrix2binfile("podabfy.bin", abfy, kmax, nij); - savematrix2binfile("podabfz.bin", abfz, kmax, nij); - savematrix2binfile("podbdd.bin", bdd, 3*nij, Mdesc); - savematrix2binfile("podbd.bin", bd, ni, Mdesc); - savematrix2binfile("podsumU.bin", sumU, nelements * K3 * nrbfmax, ni); + savematrix2binfile("podabfz.bin", abfz, kmax, nij); + savematrix2binfile("podbdd.bin", bdd, 3*nij, Mdesc); + savematrix2binfile("podbd.bin", bd, ni, Mdesc); + savematrix2binfile("podsumU.bin", sumU, nelements * K3 * nrbfmax, ni); savematrix2binfile("podrij.bin", rij, 3, nij); savematrix2binfile("podfij.bin", fij, 3, nij); - savematrix2binfile("podei.bin", ei, ni, 1); + savematrix2binfile("podei.bin", ei, ni, 1); error->all(FLERR, "Save data and stop the run for debugging"); } diff --git a/src/ML-POD/pair_pod.h b/src/ML-POD/pair_pod.h index 9859d7e77c..00a0f32564 100644 --- a/src/ML-POD/pair_pod.h +++ b/src/ML-POD/pair_pod.h @@ -41,13 +41,13 @@ public: void NeighborCount(double **x, int **firstneigh, int *ilist, int *numneigh, double rcutsq, int i1, int i2); void NeighborList(double **x, int **firstneigh, int *atomtype, int *map, int *ilist, int *numneigh, double rcutsq, int i1, int i2); - void tallyenergy(double *ei, int istart, int Ni); - void tallystress(double *fij, double *rij, int *ai, int *aj, int nlocal, int N); + void tallyenergy(double *ei, int istart, int Ni); + void tallystress(double *fij, double *rij, int *ai, int *aj, int nlocal, int N); void tallyforce(double **force, double *fij, int *ai, int *aj, int N); void divideInterval(int *intervals, int N, int M); - int calculateNumberOfIntervals(int N, int intervalSize); + int calculateNumberOfIntervals(int N, int intervalSize); int numberOfNeighbors(); - + void copy_data_from_pod_class(); void radialbasis(double *rbft, double *rbftx, double *rbfty, double *rbftz, double *rij, int Nij); void orthogonalradialbasis(int Nij); @@ -65,32 +65,32 @@ public: void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12, int Ni); void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2, int *ind1, int *ind2, int *idxi, int n12, int Ni, int Nij); - void blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int Nij); + void blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int Nij); void blockatomenergyforce(double *ei, double *fij, int Ni, int Nij); - void crossdesc_reduction(double *cb1, double *cb2, double *c12, double *d1, - double *d2, int *ind1, int *ind2, int n12, int Ni); + void crossdesc_reduction(double *cb1, double *cb2, double *c12, double *d1, + double *d2, int *ind1, int *ind2, int n12, int Ni); void blockatom_base_descriptors(double *bd1, int Ni, int Nij); void blockatom_base_coefficients(double *ei, double *cb, double *B, int Ni); void blockatom_environment_descriptors(double *ei, double *cb, double *B, int Ni); void blockatom_energyforce(double *ei, double *fij, int Ni, int Nij); void blockatom_energies(double *ei, int Ni, int Nij); - void blockatom_forces(double *fij, int Ni, int Nij); - + void blockatom_forces(double *fij, int Ni, int Nij); + void twobody_forces(double *fij, double *cb2, int Ni, int Nij); void threebody_forces(double *fij, double *cb3, int Ni, int Nij); void fourbody_forces(double *fij, double *cb4, int Ni, int Nij); - + void threebody_forcecoeff(double *fb3, double *cb3, int Ni); void fourbody_forcecoeff(double *fb4, double *cb4, int Ni); void allbody_forces(double *fij, double *forcecoeff, int Nij); - + void savematrix2binfile(std::string filename, double *A, int nrows, int ncols); - void saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols); - void savedatafordebugging(); + void saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols); + void savedatafordebugging(); protected: - class EAPOD *fastpodptr; + class EAPOD *fastpodptr; virtual void allocate(); void grow_atoms(int Ni); void grow_pairs(int Nij); @@ -98,13 +98,13 @@ protected: int atomBlockSize; // size of each atom block int nAtomBlocks; // number of atoms blocks int atomBlocks[101]; // atom blocks - + int ni; // total number of atoms i int nij; // total number of pairs (i,j) int nimax; // maximum number of atoms i int nijmax; // maximum number of pairs (i,j) - int nelements; // number of elements + int nelements; // number of elements int onebody; // one-body descriptors int besseldegree; // degree of Bessel functions int inversedegree; // degree of inverse functions @@ -113,57 +113,57 @@ protected: int ns; // number of snapshots for radial basis functions int nl1, nl2, nl3, nl4, nl23, nl33, nl34, nl44, nl; // number of local descriptors int nrbf2, nrbf3, nrbf4, nrbfmax; // number of radial basis functions - int nabf3, nabf4; // number of angular basis functions + int nabf3, nabf4; // number of angular basis functions int K3, K4, Q4; // number of monomials - + // environmental variables int nClusters; // number of environment clusters int nComponents; // number of principal components - int Mdesc; // number of base descriptors + int Mdesc; // number of base descriptors double rin; // inner cut-off radius double rcut; // outer cut-off radius double rmax; // rcut - rin - + double *rij; // (xj - xi) for all pairs (I, J) double *fij; // force for all pairs (I, J) double *ei; // energy for each atom I int *typeai; // types of atoms I only - int *numij; // number of pairs (I, J) for each atom I + int *numij; // number of pairs (I, J) for each atom I int *idxi; // storing linear indices of atom I for all pairs (I, J) int *ai; // IDs of atoms I for all pairs (I, J) int *aj; // IDs of atoms J for all pairs (I, J) int *ti; // types of atoms I for all pairs (I, J) - int *tj; // types of atoms J for all pairs (I, J) + int *tj; // types of atoms J for all pairs (I, J) double besselparams[3]; double *Phi ; // eigenvectors matrix ns x ns - double *rbf; // radial basis functions nij x nrbfmax - double *rbfx; // x-derivatives of radial basis functions nij x nrbfmax + double *rbf; // radial basis functions nij x nrbfmax + double *rbfx; // x-derivatives of radial basis functions nij x nrbfmax double *rbfy; // y-derivatives of radial basis functions nij x nrbfmax - double *rbfz; // z-derivatives of radial basis functions nij x nrbfmax + double *rbfz; // z-derivatives of radial basis functions nij x nrbfmax double *abf; // angular basis functions nij x K3 double *abfx; // x-derivatives of angular basis functions nij x K3 - double *abfy; // y-derivatives of angular basis functions nij x K3 + double *abfy; // y-derivatives of angular basis functions nij x K3 double *abfz; // z-derivatives of angular basis functions nij x K3 double *abftm ; // angular basis functions 4 x K3 double *sumU; // sum of radial basis functions ni x K3 x nrbfmax x nelements double *forcecoeff; // force coefficients ni x K3 x nrbfmax x nelements double *Proj; // PCA Projection matrix - double *Centroids; // centroids of the clusters + double *Centroids; // centroids of the clusters double *bd; // base descriptors ni x Mdesc double *cb; // force coefficients for base descriptors ni x Mdesc double *pd; // environment probability descriptors ni x nClusters - double *bdd; // base descriptors derivatives 3 x nij x Mdesc + double *bdd; // base descriptors derivatives 3 x nij x Mdesc double *pdd; // environment probability descriptors derivatives 3 x nij x nClusters double *coefficients; // coefficients nCoeffPerElement x nelements int *pq3, *pn3, *pc3; // arrays to compute 3-body angular basis functions - int *pa4, *pb4, *pc4; // arrays to compute 4-body angular basis functions + int *pa4, *pb4, *pc4; // arrays to compute 4-body angular basis functions int *ind33l, *ind33r; // nl33 int *ind34l, *ind34r; // nl34 int *ind44l, *ind44r; // nl44 int *elemindex; - + bool peratom_warn; // print warning about missing per-atom energies or stresses };