diff --git a/.github/workflows/unittest-arm64.yml b/.github/workflows/unittest-arm64.yml new file mode 100644 index 0000000000..094c5fb0c1 --- /dev/null +++ b/.github/workflows/unittest-arm64.yml @@ -0,0 +1,81 @@ +# GitHub action to build LAMMPS on Linux with ARM64 and run standard unit tests +name: "Unittest for Linux on ARM64" + +on: + push: + branches: [develop] + + workflow_dispatch: + +concurrency: + group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + build: + name: Linux ARM64 Unit Test + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-22.04-arm + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Install extra packages + run: | + sudo apt-get update + sudo apt-get install -y ccache \ + libeigen3-dev \ + libcurl4-openssl-dev \ + mold \ + ninja-build \ + python3-dev + + - name: Create Build Environment + run: mkdir build + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: linux-unit-ccache-${{ github.sha }} + restore-keys: linux-unit-ccache- + + - name: Building LAMMPS via CMake + shell: bash + run: | + ccache -z + python3 -m venv linuxenv + source linuxenv/bin/activate + python3 -m pip install numpy + python3 -m pip install pyyaml + cmake -S cmake -B build \ + -C cmake/presets/gcc.cmake \ + -C cmake/presets/most.cmake \ + -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -D CMAKE_C_COMPILER_LAUNCHER=ccache \ + -D BUILD_SHARED_LIBS=on \ + -D DOWNLOAD_POTENTIALS=off \ + -D ENABLE_TESTING=on \ + -D MLIAP_ENABLE_ACE=on \ + -D MLIAP_ENABLE_PYTHON=off \ + -D PKG_MANIFOLD=on \ + -D PKG_ML-PACE=on \ + -D PKG_ML-RANN=on \ + -D PKG_RHEO=on \ + -D PKG_PTM=on \ + -D PKG_PYTHON=on \ + -D PKG_QTB=on \ + -D PKG_SMTBQ=on \ + -G Ninja + cmake --build build + ccache -s + + - name: Run Tests + working-directory: build + shell: bash + run: ctest -V -LE unstable diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 2fa5a449fb..2731b0df14 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -117,7 +117,6 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/group_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 1ec3646de2..4269e64189 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -230,7 +230,6 @@ action fix_wall_region_kokkos.cpp action fix_wall_region_kokkos.h action grid3d_kokkos.cpp fft3d.h action grid3d_kokkos.h fft3d.h -action group_kokkos.cpp action group_kokkos.h action improper_class2_kokkos.cpp improper_class2.cpp action improper_class2_kokkos.h improper_class2.h diff --git a/src/KOKKOS/angle_harmonic_kokkos.cpp b/src/KOKKOS/angle_harmonic_kokkos.cpp index 2b3c283732..26c70a2760 100644 --- a/src/KOKKOS/angle_harmonic_kokkos.cpp +++ b/src/KOKKOS/angle_harmonic_kokkos.cpp @@ -72,14 +72,14 @@ void AngleHarmonicKokkos::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - if(k_eatom.extent(0) < maxeatom) { + if ((int)k_eatom.extent(0) < maxeatom) { memoryKK->destroy_kokkos(k_eatom,eatom); memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom"); d_eatom = k_eatom.template view(); } else Kokkos::deep_copy(d_eatom,0.0); } if (vflag_atom) { - if(k_vatom.extent(0) < maxvatom) { + if ((int)k_vatom.extent(0) < maxvatom) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"angle:vatom"); d_vatom = k_vatom.template view(); diff --git a/src/KOKKOS/angle_hybrid_kokkos.cpp b/src/KOKKOS/angle_hybrid_kokkos.cpp index 06b2845545..cbdf7bd942 100644 --- a/src/KOKKOS/angle_hybrid_kokkos.cpp +++ b/src/KOKKOS/angle_hybrid_kokkos.cpp @@ -76,7 +76,7 @@ void AngleHybridKokkos::compute(int eflag, int vflag) Kokkos::parallel_for(nanglelist_orig,LAMMPS_LAMBDA(int i) { const int m = d_map[d_anglelist_orig(i,3)]; - if (m >= 0) Kokkos::atomic_increment(&d_nanglelist[m]); + if (m >= 0) Kokkos::atomic_inc(&d_nanglelist[m]); }); k_nanglelist.modify_device(); @@ -87,7 +87,7 @@ void AngleHybridKokkos::compute(int eflag, int vflag) if (h_nanglelist[m] > maxangle_all) maxangle_all = h_nanglelist[m] + EXTRA; - if (k_anglelist.d_view.extent(1) < maxangle_all) + if ((int)k_anglelist.d_view.extent(1) < maxangle_all) MemKK::realloc_kokkos(k_anglelist, "angle_hybrid:anglelist", nstyles, maxangle_all, 4); auto d_anglelist = k_anglelist.d_view; diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp index 7e12400c9b..488b461bc2 100644 --- a/src/KOKKOS/bond_harmonic_kokkos.cpp +++ b/src/KOKKOS/bond_harmonic_kokkos.cpp @@ -67,14 +67,14 @@ void BondHarmonicKokkos::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - if (k_eatom.extent(0) < maxeatom) { + if ((int)k_eatom.extent(0) < maxeatom) { memoryKK->destroy_kokkos(k_eatom,eatom); memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); d_eatom = k_eatom.template view(); } else Kokkos::deep_copy(d_eatom,0.0); } if (vflag_atom) { - if (k_vatom.extent(0) < maxvatom) { + if ((int)k_vatom.extent(0) < maxvatom) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom"); d_vatom = k_vatom.template view(); diff --git a/src/KOKKOS/bond_hybrid_kokkos.cpp b/src/KOKKOS/bond_hybrid_kokkos.cpp index db247c7100..4fa3abff4f 100644 --- a/src/KOKKOS/bond_hybrid_kokkos.cpp +++ b/src/KOKKOS/bond_hybrid_kokkos.cpp @@ -76,7 +76,7 @@ void BondHybridKokkos::compute(int eflag, int vflag) Kokkos::parallel_for(nbondlist_orig,LAMMPS_LAMBDA(int i) { const int m = d_map[d_bondlist_orig(i,2)]; - if (m >= 0) Kokkos::atomic_increment(&d_nbondlist[m]); + if (m >= 0) Kokkos::atomic_inc(&d_nbondlist[m]); }); k_nbondlist.modify_device(); @@ -87,7 +87,7 @@ void BondHybridKokkos::compute(int eflag, int vflag) if (h_nbondlist[m] > maxbond_all) maxbond_all = h_nbondlist[m] + EXTRA; - if (k_bondlist.d_view.extent(1) < maxbond_all) + if ((int)k_bondlist.d_view.extent(1) < maxbond_all) MemKK::realloc_kokkos(k_bondlist, "bond_hybrid:bondlist", nstyles, maxbond_all, 3); auto d_bondlist = k_bondlist.d_view; diff --git a/src/KOKKOS/comm_tiled_kokkos.cpp b/src/KOKKOS/comm_tiled_kokkos.cpp index afddc079f4..7222ed4fb2 100644 --- a/src/KOKKOS/comm_tiled_kokkos.cpp +++ b/src/KOKKOS/comm_tiled_kokkos.cpp @@ -37,6 +37,8 @@ static constexpr int BUFEXTRA = 1000; CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp) { sendlist = nullptr; + maxsendlist = nullptr; + nprocmaxtot = 0; } /* ---------------------------------------------------------------------- */ @@ -49,6 +51,8 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp) CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,oldcomm) { sendlist = nullptr; + maxsendlist = nullptr; + nprocmaxtot = 0; } /* ---------------------------------------------------------------------- */ @@ -56,7 +60,9 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,o CommTiledKokkos::~CommTiledKokkos() { memoryKK->destroy_kokkos(k_sendlist,sendlist); + memory->destroy(maxsendlist); sendlist = nullptr; + maxsendlist = nullptr; buf_send = nullptr; buf_recv = nullptr; } @@ -657,12 +663,11 @@ void CommTiledKokkos::grow_list(int iswap, int iwhich, int n) k_sendlist.sync(); k_sendlist.modify(); - if (size > (int)k_sendlist.extent(2)) { - memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,maxsend,size,"comm:sendlist"); + memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,nprocmaxtot,size,"comm:sendlist"); - for (int i = 0; i < maxswap; i++) - maxsendlist[iswap][iwhich] = size; - } + for (int i = 0; i < maxswap; i++) + for (int j = 0; j < nprocmaxtot; j++) + maxsendlist[i][j] = size; } /* ---------------------------------------------------------------------- @@ -692,24 +697,23 @@ void CommTiledKokkos::grow_swap_send(int i, int n, int /*nold*/) memory->destroy(sendbox_multiold[i]); memory->create(sendbox_multiold[i],n,atom->ntypes+1,6,"comm:sendbox_multiold"); - delete [] maxsendlist[i]; - maxsendlist[i] = new int[n]; - - for (int j = 0; j < n; j++) - maxsendlist[i][j] = BUFMIN; - - if (sendlist && !k_sendlist.d_view.data()) { - for (int ii = 0; ii < maxswap; ii++) { - if (sendlist[ii]) { - for (int jj = 0; jj < nprocmax[ii]; jj++) - memory->destroy(sendlist[ii][jj]); - delete [] sendlist[ii]; - } - } + if (sendlist && !k_sendlist.h_view.data()) { delete [] sendlist; + delete [] maxsendlist; + + sendlist = nullptr; + maxsendlist = nullptr; } else { memoryKK->destroy_kokkos(k_sendlist,sendlist); + memory->destroy(maxsendlist); } - memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,n,BUFMIN,"comm:sendlist"); + nprocmaxtot = MAX(nprocmaxtot,n); + + memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,nprocmaxtot,BUFMIN,"comm:sendlist"); + memory->create(maxsendlist,maxswap,nprocmaxtot,"comm:maxsendlist"); + + for (int i = 0; i < maxswap; i++) + for (int j = 0; j < nprocmaxtot; j++) + maxsendlist[i][j] = BUFMIN; } diff --git a/src/KOKKOS/comm_tiled_kokkos.h b/src/KOKKOS/comm_tiled_kokkos.h index ef226489c8..67036e0a2f 100644 --- a/src/KOKKOS/comm_tiled_kokkos.h +++ b/src/KOKKOS/comm_tiled_kokkos.h @@ -64,18 +64,17 @@ class CommTiledKokkos : public CommTiled { template void reverse_comm_device(); protected: + int nprocmaxtot; DAT::tdual_int_3d k_sendlist; - //DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; - //DAT::tdual_int_scalar k_count; - void grow_send(int, int) override; - void grow_recv(int, int flag = 0) override; + void grow_send(int, int) override; // reallocate send buffer + void grow_recv(int, int flag = 0) override; // free/allocate recv buffer void grow_send_kokkos(int, int, ExecutionSpace space = Host); void grow_recv_kokkos(int, int, ExecutionSpace space = Host); - void grow_list(int, int, int) override; - void grow_swap_send(int, int, int) override; // grow swap arrays for send and recv + void grow_list(int, int, int) override; // reallocate sendlist for one swap/proc + void grow_swap_send(int, int, int) override; // grow swap arrays for send and recv }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/dihedral_harmonic_kokkos.cpp b/src/KOKKOS/dihedral_harmonic_kokkos.cpp index 05babd69b4..8575cc1807 100644 --- a/src/KOKKOS/dihedral_harmonic_kokkos.cpp +++ b/src/KOKKOS/dihedral_harmonic_kokkos.cpp @@ -75,14 +75,14 @@ void DihedralHarmonicKokkos::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - if(k_eatom.extent(0) < maxeatom) { + if ((int)k_eatom.extent(0) < maxeatom) { memoryKK->destroy_kokkos(k_eatom,eatom); memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); d_eatom = k_eatom.view(); } else Kokkos::deep_copy(d_eatom,0.0); } if (vflag_atom) { - if(k_vatom.extent(0) < maxvatom) { + if ((int)k_vatom.extent(0) < maxvatom) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom"); d_vatom = k_vatom.view(); diff --git a/src/KOKKOS/dihedral_hybrid_kokkos.cpp b/src/KOKKOS/dihedral_hybrid_kokkos.cpp index 88dbeaf13b..60eb2dc0a7 100644 --- a/src/KOKKOS/dihedral_hybrid_kokkos.cpp +++ b/src/KOKKOS/dihedral_hybrid_kokkos.cpp @@ -76,7 +76,7 @@ void DihedralHybridKokkos::compute(int eflag, int vflag) Kokkos::parallel_for(ndihedrallist_orig,LAMMPS_LAMBDA(int i) { const int m = d_map[d_dihedrallist_orig(i,4)]; - if (m >= 0) Kokkos::atomic_increment(&d_ndihedrallist[m]); + if (m >= 0) Kokkos::atomic_inc(&d_ndihedrallist[m]); }); k_ndihedrallist.modify_device(); @@ -87,7 +87,7 @@ void DihedralHybridKokkos::compute(int eflag, int vflag) if (h_ndihedrallist[m] > maxdihedral_all) maxdihedral_all = h_ndihedrallist[m] + EXTRA; - if (k_dihedrallist.d_view.extent(1) < maxdihedral_all) + if ((int)k_dihedrallist.d_view.extent(1) < maxdihedral_all) MemKK::realloc_kokkos(k_dihedrallist, "dihedral_hybrid:dihedrallist", nstyles, maxdihedral_all, 5); auto d_dihedrallist = k_dihedrallist.d_view; diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index dd92afe9cc..b3149ba84d 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -690,7 +690,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( copymode = 1; - Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) { + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend), KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) { const int i = d_exchange_sendlist(mysend); @@ -782,7 +782,7 @@ void FixCMAPKokkos::unpack_exchange_kokkos( copymode = 1; - Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv), KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); if (index > -1) { int m = d_ubuf(d_buf(i)).i; diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index 546f204de6..c149ebda6a 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -39,7 +39,12 @@ enum { CONSTANT, EQUAL, ATOM }; template FixLangevinKokkos::FixLangevinKokkos(LAMMPS *lmp, int narg, char **arg) : - FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me) + FixLangevin(lmp, narg, arg), +#ifdef LMP_KOKKOS_DEBUG_RNG + rand_pool(seed + comm->me, lmp) +#else + rand_pool(seed + comm->me) +#endif { kokkosable = 1; fuse_integrate_flag = 1; @@ -48,43 +53,42 @@ FixLangevinKokkos::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a int ntypes = atomKK->ntypes; // allocate per-type arrays for force prefactors + delete[] gfactor1; + delete[] gfactor2; + delete[] ratio; memoryKK->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1"); memoryKK->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2"); memoryKK->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio"); d_gfactor1 = k_gfactor1.template view(); - h_gfactor1 = k_gfactor1.template view(); + h_gfactor1 = k_gfactor1.h_view; d_gfactor2 = k_gfactor2.template view(); - h_gfactor2 = k_gfactor2.template view(); + h_gfactor2 = k_gfactor2.h_view; d_ratio = k_ratio.template view(); - h_ratio = k_ratio.template view(); + h_ratio = k_ratio.h_view; // optional args for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0; - k_ratio.template modify(); + k_ratio.modify_host(); if (gjfflag) { + memory->destroy(franprev); + memory->destroy(lv); grow_arrays(atomKK->nmax); - atom->add_callback(Atom::GROW); + // initialize franprev to zero - for (int i = 0; i < atomKK->nlocal; i++) { - franprev[i][0] = 0.0; - franprev[i][1] = 0.0; - franprev[i][2] = 0.0; - lv[i][0] = 0.0; - lv[i][1] = 0.0; - lv[i][2] = 0.0; - } - k_franprev.template modify(); - k_lv.template modify(); + + Kokkos::deep_copy(d_franprev,0.0); + Kokkos::deep_copy(d_lv,0.0); } + if (zeroflag) { k_fsumall = tdual_double_1d_3n("langevin:fsumall"); - h_fsumall = k_fsumall.template view(); + h_fsumall = k_fsumall.h_view; d_fsumall = k_fsumall.template view(); } execution_space = ExecutionSpaceFromDevice::space; - datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; + datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; datamask_modify = F_MASK; } @@ -93,13 +97,21 @@ FixLangevinKokkos::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a template FixLangevinKokkos::~FixLangevinKokkos() { + if (copymode) return; + memoryKK->destroy_kokkos(k_gfactor1,gfactor1); memoryKK->destroy_kokkos(k_gfactor2,gfactor2); memoryKK->destroy_kokkos(k_ratio,ratio); memoryKK->destroy_kokkos(k_flangevin,flangevin); - if (gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev); - if (gjfflag) memoryKK->destroy_kokkos(k_lv,lv); + if (gjfflag) { + memoryKK->destroy_kokkos(k_franprev,franprev); + memoryKK->destroy_kokkos(k_lv,lv); + } memoryKK->destroy_kokkos(k_tforce,tforce); + +#ifdef LMP_KOKKOS_DEBUG_RNG + rand_pool.destroy(); +#endif } /* ---------------------------------------------------------------------- */ @@ -118,8 +130,170 @@ void FixLangevinKokkos::init() error->warning(FLERR,"Fix langevin gjf + kokkos is not implemented with random gaussians"); // prefactors are modified in the init - k_gfactor1.template modify(); - k_gfactor2.template modify(); + k_gfactor1.modify_host(); + k_gfactor2.modify_host(); + +#ifdef LMP_KOKKOS_DEBUG_RNG + rand_pool.init(random,seed + comm->me); +#endif +} + +/* ---------------------------------------------------------------------- */ + +template +void FixLangevinKokkos::setup(int vflag) +{ + if (gjfflag) { + double dt = update->dt; + double ftm2v = force->ftm2v; + auto v = atomKK->k_v.view(); + auto f = atomKK->k_f.view(); + auto mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + auto rmass = atomKK->k_rmass.view(); + auto mass = atomKK->k_mass.view(); + auto type = atomKK->k_type.view(); + auto groupbit = this->groupbit; + auto gjfa = this->gjfa; + auto gjfsib = this->gjfsib; + + if (atom->rmass) { + atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|RMASS_MASK); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int &i) { + if (mask[i] & groupbit) { + const double dtfm = ftm2v * 0.5 * dt / rmass[i]; + v(i,0) -= dtfm * f(i,0); + v(i,1) -= dtfm * f(i,1); + v(i,2) -= dtfm * f(i,2); + } + }); + + if (tbiasflag) { + // account for bias velocity + if (temperature->kokkosable) { + temperature->compute_scalar(); + temperature->remove_bias_all_kk(); + } else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->compute_scalar(); + temperature->remove_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } + } + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int &i) { + if (mask[i] & groupbit) { + v(i,0) /= gjfa * gjfsib * gjfsib; + v(i,1) /= gjfa * gjfsib * gjfsib; + v(i,2) /= gjfa * gjfsib * gjfsib; + } + }); + + if (tbiasflag) { + if (temperature->kokkosable) temperature->restore_bias_all(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->restore_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } + } + + } else { + atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|TYPE_MASK); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int &i) { + if (mask[i] & groupbit) { + const double dtfm = ftm2v * 0.5 * dt / mass[type[i]]; + v(i,0) -= dtfm * f(i,0); + v(i,1) -= dtfm * f(i,1); + v(i,2) -= dtfm * f(i,2); + } + }); + + if (tbiasflag) { + // account for bias velocity + if (temperature->kokkosable) { + temperature->compute_scalar(); + temperature->remove_bias_all_kk(); + } else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->compute_scalar(); + temperature->remove_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } + } + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int &i) { + if (mask[i] & groupbit) { + v(i,0) /= gjfa * gjfsib * gjfsib; + v(i,1) /= gjfa * gjfsib * gjfsib; + v(i,2) /= gjfa * gjfsib * gjfsib; + } + }); + + if (tbiasflag) { + if (temperature->kokkosable) temperature->restore_bias_all(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->restore_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } + } + + } + atomKK->modified(execution_space,V_MASK); + } + + post_force(vflag); + + if (gjfflag) { + double dt = update->dt; + double ftm2v = force->ftm2v; + auto f = atomKK->k_f.view(); + auto v = atomKK->k_v.view(); + auto mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + auto rmass = atomKK->k_rmass.view(); + auto mass = atomKK->k_mass.view(); + auto type = atomKK->k_type.view(); + auto groupbit = this->groupbit; + + k_lv.template sync(); + auto l_lv = d_lv; + + if (atom->rmass) { + atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|RMASS_MASK); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int &i) { + if (mask[i] & groupbit) { + const double dtfm = ftm2v * 0.5 * dt / rmass[i]; + v(i,0) += dtfm * f(i,0); + v(i,1) += dtfm * f(i,1); + v(i,2) += dtfm * f(i,2); + l_lv(i,0) = v(i,0); + l_lv(i,1) = v(i,1); + l_lv(i,2) = v(i,2); + } + }); + } else { + atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|TYPE_MASK); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int &i) { + if (mask[i] & groupbit) { + const double dtfm = ftm2v * 0.5 * dt / mass[type[i]]; + v(i,0) += dtfm * f(i,0); + v(i,1) += dtfm * f(i,1); + v(i,2) += dtfm * f(i,2); + l_lv(i,0) = v(i,0); + l_lv(i,1) = v(i,1); + l_lv(i,2) = v(i,2); + } + }); + } + atomKK->modified(execution_space,V_MASK); + k_lv.template modify(); + } } /* ---------------------------------------------------------------------- */ @@ -129,10 +303,10 @@ void FixLangevinKokkos::grow_arrays(int nmax) { memoryKK->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev"); d_franprev = k_franprev.template view(); - h_franprev = k_franprev.template view(); + h_franprev = k_franprev.h_view; memoryKK->grow_kokkos(k_lv,lv,nmax,3,"langevin:lv"); d_lv = k_lv.template view(); - h_lv = k_lv.template view(); + h_lv = k_lv.h_view; } /* ---------------------------------------------------------------------- */ @@ -141,7 +315,6 @@ template void FixLangevinKokkos::initial_integrate(int /*vflag*/) { atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); v = atomKK->k_v.view(); f = atomKK->k_f.view(); @@ -150,6 +323,8 @@ void FixLangevinKokkos::initial_integrate(int /*vflag*/) FixLangevinKokkosInitialIntegrateFunctor functor(this); Kokkos::parallel_for(nlocal,functor); + + atomKK->modified(execution_space,datamask_modify); } template @@ -184,6 +359,7 @@ void FixLangevinKokkos::post_force(int /*vflag*/) rmass = atomKK->k_rmass.view(); f = atomKK->k_f.template view(); v = atomKK->k_v.template view(); + mass = atomKK->k_mass.template view(); type = atomKK->k_type.template view(); mask = atomKK->k_mask.template view(); @@ -197,7 +373,8 @@ void FixLangevinKokkos::post_force(int /*vflag*/) dt = update->dt; mvv2e = force->mvv2e; ftm2v = force->ftm2v; - fran_prop_const = sqrt(24.0*boltz/t_period/dt/mvv2e); + fran_prop_const = sqrt(2.0*boltz/t_period/dt/mvv2e); + fran_prop_const_gjf = sqrt(24.0*boltz/t_period/dt/mvv2e); compute_target(); // modifies tforce vector, hence sync here k_tforce.template sync(); @@ -220,7 +397,7 @@ void FixLangevinKokkos::post_force(int /*vflag*/) maxatom1 = atomKK->nmax; memoryKK->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin"); d_flangevin = k_flangevin.template view(); - h_flangevin = k_flangevin.template view(); + h_flangevin = k_flangevin.h_view; } } @@ -550,7 +727,7 @@ void FixLangevinKokkos::post_force(int /*vflag*/) h_fsumall(0) = fsumall[0]/count; h_fsumall(1) = fsumall[1]/count; h_fsumall(2) = fsumall[2]/count; - k_fsumall.template modify(); + k_fsumall.modify_host(); k_fsumall.template sync(); // set total force zero in parallel on the device FixLangevinKokkosZeroForceFunctor zero_functor(this); @@ -581,20 +758,30 @@ FSUM FixLangevinKokkos::post_force_item(int i) const if (mask[i] & groupbit) { rand_type rand_gen = rand_pool.get_state(); + if (Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]); if (Tp_RMASS) { gamma1 = -rmass[i] / t_period / ftm2v; - gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v; - gamma1 *= 1.0/d_ratio[type[i]]; + if (Tp_GJF) + gamma2 = sqrt(rmass[i]) * fran_prop_const_gjf / ftm2v; + else + gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v; + gamma1 *= 1.0/ratio[type[i]]; gamma2 *= 1.0/sqrt(d_ratio[type[i]]) * tsqrt_t; } else { gamma1 = d_gfactor1[type[i]]; gamma2 = d_gfactor2[type[i]] * tsqrt_t; } - fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); - fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); - fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); + if (Tp_GJF) { + fran[0] = gamma2 * rand_gen.normal(); //random->gaussian() + fran[1] = gamma2 * rand_gen.normal(); //random->gaussian() + fran[2] = gamma2 * rand_gen.normal(); //random->gaussian() + } else { + fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); + fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); + fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); + } if (Tp_BIAS) { fdrag[0] = gamma1*v(i,0); @@ -678,7 +865,6 @@ void FixLangevinKokkos::zero_force_item(int i) const f(i,1) -= d_fsumall[1]; f(i,2) -= d_fsumall[2]; } - } /* ---------------------------------------------------------------------- @@ -740,7 +926,7 @@ void FixLangevinKokkos::reset_dt() force->ftm2v; h_gfactor2[i] *= 1.0/sqrt(h_ratio[i]); } - k_gfactor2.template modify(); + k_gfactor2.modify_host(); } } @@ -781,9 +967,15 @@ KOKKOS_INLINE_FUNCTION double FixLangevinKokkos::compute_energy_item(int i) const { double my_energy = 0.0; - if (mask[i] & groupbit) - my_energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) + - d_flangevin(i,2)*v(i,2); + if (mask[i] & groupbit) { + if (gjfflag) { + my_energy = d_flangevin(i,0)*d_lv(i,0) + d_flangevin(i,1)*d_lv(i,1) + + d_flangevin(i,2)*d_lv(i,2); + } else { + my_energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) + + d_flangevin(i,2)*v(i,2); + } + } return my_energy; } @@ -796,30 +988,42 @@ void FixLangevinKokkos::end_of_step() { if (!tallyflag && !gjfflag) return; + dt = update->dt; + ftm2v = force->ftm2v; v = atomKK->k_v.template view(); - f = atomKK->k_f.template view(); + rmass = atomKK->k_rmass.template view(); + mass = atomKK->k_mass.template view(); mask = atomKK->k_mask.template view(); - - atomKK->sync(execution_space,V_MASK | MASK_MASK); int nlocal = atomKK->nlocal; energy_onestep = 0.0; + atomKK->sync(execution_space,V_MASK | MASK_MASK); + if (gjfflag) k_lv.template sync(); k_flangevin.template sync(); - FixLangevinKokkosTallyEnergyFunctor tally_functor(this); - Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep); + + if (tallyflag) { + FixLangevinKokkosTallyEnergyFunctor tally_functor(this); + Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep); + } if (gjfflag) { if (rmass.data()) { + atomKK->sync(execution_space,RMASS_MASK); FixLangevinKokkosEndOfStepFunctor functor(this); Kokkos::parallel_for(nlocal,functor); } else { + atomKK->sync(execution_space,TYPE_MASK); + type = atomKK->k_type.template view(); mass = atomKK->k_mass.view(); FixLangevinKokkosEndOfStepFunctor functor(this); Kokkos::parallel_for(nlocal,functor); } } + atomKK->modified(execution_space,V_MASK); + k_lv.template modify(); + energy += energy_onestep*update->dt; } @@ -828,7 +1032,7 @@ KOKKOS_INLINE_FUNCTION void FixLangevinKokkos::end_of_step_item(int i) const { double tmp[3]; if (mask[i] & groupbit) { - const double dtfm = force->ftm2v * 0.5 * dt / mass[type[i]]; + const double dtfm = ftm2v * 0.5 * dt / mass[type[i]]; tmp[0] = v(i,0); tmp[1] = v(i,1); tmp[2] = v(i,2); @@ -841,10 +1045,10 @@ void FixLangevinKokkos::end_of_step_item(int i) const { dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,0)) + (gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,0); v(i,1) = 0.5 * gjfsib * gjfsib * (v(i,1) + dtfm * f(i,1) / gjfa) + - dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,1)) + + dtfm * 0.5 * (gjfsib * d_flangevin(i,1) - d_franprev(i,1)) + (gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,1); v(i,2) = 0.5 * gjfsib * gjfsib * (v(i,2) + dtfm * f(i,2) / gjfa) + - dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,2)) + + dtfm * 0.5 * (gjfsib * d_flangevin(i,2) - d_franprev(i,2)) + (gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,2); } d_lv(i,0) = tmp[0]; @@ -859,7 +1063,7 @@ void FixLangevinKokkos::end_of_step_rmass_item(int i) const { double tmp[3]; if (mask[i] & groupbit) { - const double dtfm = force->ftm2v * 0.5 * dt / rmass[i]; + const double dtfm = ftm2v * 0.5 * dt / rmass[i]; tmp[0] = v(i,0); tmp[1] = v(i,1); tmp[2] = v(i,2); @@ -891,6 +1095,9 @@ void FixLangevinKokkos::end_of_step_rmass_item(int i) const template void FixLangevinKokkos::copy_arrays(int i, int j, int /*delflag*/) { + k_franprev.sync_host(); + k_lv.sync_host(); + h_franprev(j,0) = h_franprev(i,0); h_franprev(j,1) = h_franprev(i,1); h_franprev(j,2) = h_franprev(i,2); @@ -898,8 +1105,8 @@ void FixLangevinKokkos::copy_arrays(int i, int j, int /*delflag*/) h_lv(j,1) = h_lv(i,1); h_lv(j,2) = h_lv(i,2); - k_franprev.template modify(); - k_lv.template modify(); + k_franprev.modify_host(); + k_lv.modify_host(); } @@ -924,24 +1131,6 @@ void FixLangevinKokkos::sort_kokkos(Kokkos::BinSort -void FixLangevinKokkos::cleanup_copy() -{ - random = nullptr; - tstr = nullptr; - gfactor1 = nullptr; - gfactor2 = nullptr; - ratio = nullptr; - id_temp = nullptr; - flangevin = nullptr; - tforce = nullptr; - gjfflag = 0; - franprev = nullptr; - lv = nullptr; - id = style = nullptr; - vatom = nullptr; -} - namespace LAMMPS_NS { template class FixLangevinKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h index fc25a0a748..c674060bf2 100644 --- a/src/KOKKOS/fix_langevin_kokkos.h +++ b/src/KOKKOS/fix_langevin_kokkos.h @@ -27,6 +27,7 @@ FixStyle(langevin/kk/host,FixLangevinKokkos); #include "kokkos_type.h" #include "kokkos_base.h" #include "Kokkos_Random.hpp" +#include "rand_pool_wrap_kokkos.h" namespace LAMMPS_NS { @@ -66,8 +67,8 @@ namespace LAMMPS_NS { FixLangevinKokkos(class LAMMPS *, int, char **); ~FixLangevinKokkos() override; - void cleanup_copy(); void init() override; + void setup(int) override; void initial_integrate(int) override; void fused_integrate(int) override; void post_force(int) override; @@ -135,13 +136,21 @@ namespace LAMMPS_NS { typename tdual_double_1d_3n::t_dev d_fsumall; typename tdual_double_1d_3n::t_host h_fsumall; - double boltz,dt,mvv2e,ftm2v,fran_prop_const; + double boltz,dt,mvv2e,ftm2v,fran_prop_const,fran_prop_const_gjf; void compute_target(); +#ifndef LMP_KOKKOS_DEBUG_RNG Kokkos::Random_XorShift64_Pool rand_pool; typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; + //Kokkos::Random_XorShift1024_Pool rand_pool; + //typedef typename Kokkos::Random_XorShift1024_Pool::generator_type rand_type; +#else + RandPoolWrap rand_pool; + typedef RandWrap rand_type; +#endif + }; template @@ -150,7 +159,7 @@ namespace LAMMPS_NS { FixLangevinKokkos c; FixLangevinKokkosInitialIntegrateFunctor(FixLangevinKokkos* c_ptr): - c(*c_ptr) {c.cleanup_copy();}; + c(*c_ptr) {c.set_copymode(1);}; KOKKOS_INLINE_FUNCTION void operator()(const int i) const { @@ -168,7 +177,7 @@ namespace LAMMPS_NS { FixLangevinKokkosPostForceFunctor(FixLangevinKokkos* c_ptr): c(*c_ptr) {} - ~FixLangevinKokkosPostForceFunctor() {c.cleanup_copy();} + ~FixLangevinKokkosPostForceFunctor() {c.set_copymode(1);} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { @@ -204,7 +213,7 @@ namespace LAMMPS_NS { FixLangevinKokkos c; FixLangevinKokkosZeroForceFunctor(FixLangevinKokkos* c_ptr): - c(*c_ptr) {c.cleanup_copy();} + c(*c_ptr) {c.set_copymode(1);} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { @@ -218,7 +227,7 @@ namespace LAMMPS_NS { FixLangevinKokkos c; typedef double value_type; FixLangevinKokkosTallyEnergyFunctor(FixLangevinKokkos* c_ptr): - c(*c_ptr) {c.cleanup_copy();} + c(*c_ptr) {c.set_copymode(1);} KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &energy) const { @@ -241,7 +250,7 @@ namespace LAMMPS_NS { FixLangevinKokkos c; FixLangevinKokkosEndOfStepFunctor(FixLangevinKokkos* c_ptr): - c(*c_ptr) {c.cleanup_copy();} + c(*c_ptr) {c.set_copymode(1);} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { diff --git a/src/KOKKOS/fix_momentum_kokkos.cpp b/src/KOKKOS/fix_momentum_kokkos.cpp index b41a3530cb..a363e2b1e7 100644 --- a/src/KOKKOS/fix_momentum_kokkos.cpp +++ b/src/KOKKOS/fix_momentum_kokkos.cpp @@ -36,7 +36,7 @@ FixMomentumKokkos::FixMomentumKokkos(LAMMPS *lmp, int narg, char **a { kokkosable = 1; atomKK = (AtomKokkos *) atom; - groupKK = (GroupKokkos *)group; + groupKK = (GroupKokkos *)group; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; @@ -94,7 +94,7 @@ void FixMomentumKokkos::end_of_step() double ekin_old,ekin_new; ekin_old = ekin_new = 0.0; - if (dynamic) masstotal = groupKK->mass(igroup); + if (dynamic) masstotal = groupKK->mass_kk(igroup); // do nothing if group is empty, i.e. mass is zero; @@ -109,7 +109,7 @@ void FixMomentumKokkos::end_of_step() auto groupbit2 = groupbit; if (linear) { double vcm[3]; - groupKK->vcm(igroup,masstotal,vcm); + groupKK->vcm_kk(igroup,masstotal,vcm); // adjust velocities by vcm to zero linear momentum // only adjust a component if flag is set @@ -131,9 +131,9 @@ void FixMomentumKokkos::end_of_step() if (angular) { double xcm[3],angmom[3],omega[3],inertia[3][3]; - groupKK->xcm(igroup,masstotal,xcm); - groupKK->angmom(igroup,xcm,angmom); - groupKK->inertia(igroup,xcm,inertia); + groupKK->xcm_kk(igroup,masstotal,xcm); + groupKK->angmom_kk(igroup,xcm,angmom); + groupKK->inertia_kk(igroup,xcm,inertia); group->omega(angmom,inertia,omega); // adjust velocities to zero omega diff --git a/src/KOKKOS/fix_momentum_kokkos.h b/src/KOKKOS/fix_momentum_kokkos.h index 0ab91c423d..5ea474a069 100644 --- a/src/KOKKOS/fix_momentum_kokkos.h +++ b/src/KOKKOS/fix_momentum_kokkos.h @@ -38,7 +38,7 @@ class FixMomentumKokkos : public FixMomentum { FixMomentumKokkos(class LAMMPS *, int, char **); void end_of_step() override; private: - GroupKokkos *groupKK; + GroupKokkos *groupKK; }; } diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index de77427e49..9cc8fb22b5 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -66,7 +66,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto d_type = atomKK->k_type.template view(); atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); - Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { const double dtfm = l_dtf / d_rmass[i]; d_v(i,0) += dtfm * d_f(i,0); @@ -95,7 +95,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto l_groupbit = groupbit; atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); - Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { const double dtfm = l_dtf / d_mass[d_type[i]]; d_v(i,0) += dtfm * d_f(i,0); @@ -144,7 +144,7 @@ void FixNVELimitKokkos::final_integrate() auto d_rmass = atomKK->k_rmass.template view(); atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); - Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { const double dtfm = l_dtf / d_rmass[i]; d_v(i,0) += dtfm * d_f(i,0); @@ -168,7 +168,7 @@ void FixNVELimitKokkos::final_integrate() auto d_type = atomKK->k_type.template view(); atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); - Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { const double dtfm = l_dtf / d_mass[d_type[i]]; d_v(i,0) += dtfm * d_f(i,0); diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index 607f5ce8d9..3f67e0f65d 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -38,7 +38,7 @@ FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **a { kokkosable = 1; atomKK = (AtomKokkos *)atom; - groupKK = (GroupKokkos *)group; + groupKK = (GroupKokkos *)group; execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | MASK_MASK; @@ -87,9 +87,10 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) // current COM - if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup); + + if (group->dynamic[igroup]) masstotal = groupKK->mass_kk(igroup); double xcm[3]; - groupKK->xcm(igroup,masstotal,xcm); + groupKK->xcm_kk(igroup,masstotal,xcm); // shift coords by difference between actual COM and requested COM diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h index 36e154e05c..46b4d3df7e 100644 --- a/src/KOKKOS/fix_recenter_kokkos.h +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -36,7 +36,7 @@ class FixRecenterKokkos : public FixRecenter { FixRecenterKokkos(class LAMMPS *, int, char **); void initial_integrate(int) override; private: - GroupKokkos *groupKK; + GroupKokkos *groupKK; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 0d1c250b3d..f785eb10e4 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -1859,7 +1859,7 @@ void FixRxKokkos::computeLocalTemperature() // loop over neighbors of my atoms #if 0 - Kokkos::parallel_for ( inum, + Kokkos::parallel_for ( Kokkos::RangePolicy(0,inum), LAMMPS_LAMBDA(const int ii) { // Create an atomic view of sumWeights and dpdThetaLocal. Only needed @@ -1939,7 +1939,7 @@ void FixRxKokkos::computeLocalTemperature() // self-interaction for local temperature #if 0 - Kokkos::parallel_for ( nlocal, + Kokkos::parallel_for ( Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int i) { double wij = 0.0; diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index a64adbcc38..04a3a45f68 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -283,22 +283,22 @@ void FixShardlowKokkos::ssa_update_dpd( const X_FLOAT delz = ztmp - x(j, 2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; #ifdef DEBUG_SSA_PAIR_CT - if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0))); - else Kokkos::atomic_increment(&(d_counters(0, 1))); - Kokkos::atomic_increment(&(d_counters(0, 2))); + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(0, 0))); + else Kokkos::atomic_inc(&(d_counters(0, 1))); + Kokkos::atomic_inc(&(d_counters(0, 2))); int rsqi = rsq / 8; if (rsqi < 0) rsqi = 0; else if (rsqi > 31) rsqi = 31; - Kokkos::atomic_increment(&(d_hist(rsqi))); + Kokkos::atomic_inc(&(d_hist(rsqi))); #endif // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { #ifdef DEBUG_SSA_PAIR_CT - if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0))); - else Kokkos::atomic_increment(&(d_counters(1, 1))); - Kokkos::atomic_increment(&(d_counters(1, 2))); + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(1, 0))); + else Kokkos::atomic_inc(&(d_counters(1, 1))); + Kokkos::atomic_inc(&(d_counters(1, 2))); #endif double r = sqrt(rsq); double rinv = 1.0/r; @@ -428,22 +428,22 @@ void FixShardlowKokkos::ssa_update_dpde( const X_FLOAT delz = ztmp - x(j, 2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; #ifdef DEBUG_SSA_PAIR_CT - if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0))); - else Kokkos::atomic_increment(&(d_counters(0, 1))); - Kokkos::atomic_increment(&(d_counters(0, 2))); + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(0, 0))); + else Kokkos::atomic_inc(&(d_counters(0, 1))); + Kokkos::atomic_inc(&(d_counters(0, 2))); int rsqi = rsq / 8; if (rsqi < 0) rsqi = 0; else if (rsqi > 31) rsqi = 31; - Kokkos::atomic_increment(&(d_hist(rsqi))); + Kokkos::atomic_inc(&(d_hist(rsqi))); #endif // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { #ifdef DEBUG_SSA_PAIR_CT - if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0))); - else Kokkos::atomic_increment(&(d_counters(1, 1))); - Kokkos::atomic_increment(&(d_counters(1, 2))); + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(1, 0))); + else Kokkos::atomic_inc(&(d_counters(1, 1))); + Kokkos::atomic_inc(&(d_counters(1, 2))); #endif double r = sqrt(rsq); diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 1b6d45ead7..59b9a49ee8 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -123,7 +123,7 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) auto l_yflag = yflag; auto l_zflag = zflag; - Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) { + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i, double& espring_kk) { if (l_mask[i] & l_groupbit) { Few x_i; x_i[0] = l_x(i,0); diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp deleted file mode 100644 index b2de2e6a64..0000000000 --- a/src/KOKKOS/group_kokkos.cpp +++ /dev/null @@ -1,363 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio at gmail) -------------------------------------------------------------------------- */ - -#include "group_kokkos.h" - -#include "atom_kokkos.h" -#include "atom_masks.h" -#include "domain_kokkos.h" -#include "kokkos_few.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -template -GroupKokkos::GroupKokkos(LAMMPS *lmp) : Group(lmp) -{ - atomKK = (AtomKokkos *)atom; - execution_space = ExecutionSpaceFromDevice::space; -} - -// ---------------------------------------------------------------------- -// computations on a group of atoms -// ---------------------------------------------------------------------- - -/* ---------------------------------------------------------------------- - compute the total mass of group of atoms - use either per-type mass or per-atom rmass -------------------------------------------------------------------------- */ - -template -double GroupKokkos::mass(int igroup) -{ - int groupbit = bitmask[igroup]; - auto d_mask = atomKK->k_mask.template view(); - double one = 0.0; - - if (atomKK->rmass) { - - auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(execution_space,MASK_MASK|RMASS_MASK); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { - if (d_mask(i) & groupbit) l_one += d_rmass(i); - }, one); - - } else { - - auto d_mass = atomKK->k_mass.template view(); - auto d_type = atomKK->k_type.template view(); - atomKK->sync(execution_space,MASK_MASK|TYPE_MASK); - atomKK->k_mass.template sync(); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { - if (d_mask(i) & groupbit) l_one += d_mass(d_type(i)); - }, one); - - } - - double all; - MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world); - return all; -} - -/* ---------------------------------------------------------------------- - compute the center-of-mass coords of group of atoms - masstotal = total mass - return center-of-mass coords in cm[] - must unwrap atoms to compute center-of-mass correctly -------------------------------------------------------------------------- */ - -template -void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) -{ - int groupbit = bitmask[igroup]; - auto d_x = atomKK->k_x.template view(); - auto d_mask = atomKK->k_mask.template view(); - auto d_image = atomKK->k_image.template view(); - auto l_prd = Few(domain->prd); - auto l_h = Few(domain->h); - auto l_triclinic = domain->triclinic; - double cmone[3] = {0.0, 0.0, 0.0}; - - if (atomKK->rmass) { - - auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { - if (d_mask(i) & groupbit) { - double massone = d_rmass(i); - Few x_i; - x_i[0] = d_x(i,0); - x_i[1] = d_x(i,1); - x_i[2] = d_x(i,2); - auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - l_cmx += unwrapKK[0] * massone; - l_cmy += unwrapKK[1] * massone; - l_cmz += unwrapKK[2] * massone; - } - }, cmone[0], cmone[1], cmone[2]); - - } else { - - auto d_mass = atomKK->k_mass.template view(); - auto d_type = atomKK->k_type.template view(); - atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); - atomKK->k_mass.template sync(); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { - if (d_mask(i) & groupbit) { - double massone = d_mass(d_type(i)); - Few x_i; - x_i[0] = d_x(i,0); - x_i[1] = d_x(i,1); - x_i[2] = d_x(i,2); - auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - l_cmx += unwrapKK[0] * massone; - l_cmy += unwrapKK[1] * massone; - l_cmz += unwrapKK[2] * massone; - } - }, cmone[0], cmone[1], cmone[2]); - - } - - MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world); - if (masstotal > 0.0) { - xcm[0] /= masstotal; - xcm[1] /= masstotal; - xcm[2] /= masstotal; - } -} - -/* ---------------------------------------------------------------------- - compute the center-of-mass velocity of group of atoms - masstotal = total mass - return center-of-mass velocity in vcm[] -------------------------------------------------------------------------- */ - -template -void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) -{ - int groupbit = bitmask[igroup]; - auto d_v = atomKK->k_v.template view(); - auto d_mask = atomKK->k_mask.template view(); - auto d_image = atomKK->k_image.template view(); - double p[3] = {0.0, 0.0, 0.0}; - - if (atomKK->rmass) { - - auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { - if (d_mask(i) & groupbit) { - double massone = d_rmass(i); - l_px += d_v(i,0) * massone; - l_py += d_v(i,1) * massone; - l_pz += d_v(i,2) * massone; - } - }, p[0], p[1], p[2]); - - } else { - - auto d_mass = atomKK->k_mass.template view(); - auto d_type = atomKK->k_type.template view(); - atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); - atomKK->k_mass.template sync(); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { - if (d_mask(i) & groupbit) { - double massone = d_mass(d_type(i)); - l_px += d_v(i,0) * massone; - l_py += d_v(i,1) * massone; - l_pz += d_v(i,2) * massone; - } - }, p[0], p[1], p[2]); - - } - - MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); - if (masstotal > 0.0) { - vcm[0] /= masstotal; - vcm[1] /= masstotal; - vcm[2] /= masstotal; - } -} - -/* ---------------------------------------------------------------------- - compute the angular momentum L (lmom) of group - around center-of-mass cm - must unwrap atoms to compute L correctly -------------------------------------------------------------------------- */ - -template -void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) -{ - int groupbit = bitmask[igroup]; - auto d_x = atomKK->k_x.template view(); - auto d_v = atomKK->k_v.template view(); - auto d_mask = atomKK->k_mask.template view(); - auto d_image = atomKK->k_image.template view(); - auto l_prd = Few(domain->prd); - auto l_h = Few(domain->h); - auto l_triclinic = domain->triclinic; - auto l_xcm0 = xcm[0]; - auto l_xcm1 = xcm[1]; - auto l_xcm2 = xcm[2]; - double p[3] = {0.0, 0.0, 0.0}; - - if (atomKK->rmass) { - - auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { - if (d_mask(i) & groupbit) { - double massone = d_rmass(i); - Few x_i; - x_i[0] = d_x(i,0); - x_i[1] = d_x(i,1); - x_i[2] = d_x(i,2); - auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - l_xcm0; - double dy = unwrapKK[1] - l_xcm1; - double dz = unwrapKK[2] - l_xcm2; - l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); - l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); - l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); - } - }, p[0], p[1], p[2]); - - } else { - - auto d_mass = atomKK->k_mass.template view(); - auto d_type = atomKK->k_type.template view(); - atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); - atomKK->k_mass.template sync(); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { - if (d_mask(i) & groupbit) { - double massone = d_mass(d_type(i)); - Few x_i; - x_i[0] = d_x(i,0); - x_i[1] = d_x(i,1); - x_i[2] = d_x(i,2); - auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - l_xcm0; - double dy = unwrapKK[1] - l_xcm1; - double dz = unwrapKK[2] - l_xcm2; - l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); - l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); - l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); - } - }, p[0], p[1], p[2]); - - } - MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world); -} - -/* ---------------------------------------------------------------------- - compute moment of inertia tensor around center-of-mass xcm of group - must unwrap atoms to compute itensor correctly -------------------------------------------------------------------------- */ - -template -void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3][3]) -{ - int groupbit = bitmask[igroup]; - auto d_x = atomKK->k_x.template view(); - auto d_mask = atomKK->k_mask.template view(); - auto d_image = atomKK->k_image.template view(); - auto l_prd = Few(domain->prd); - auto l_h = Few(domain->h); - auto l_triclinic = domain->triclinic; - auto l_xcm0 = xcm[0]; - auto l_xcm1 = xcm[1]; - auto l_xcm2 = xcm[2]; - - double ione[3][3]; - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) ione[i][j] = 0.0; - - if (atomKK->rmass) { - - auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { - if (d_mask(i) & groupbit) { - double massone = d_rmass(i); - Few x_i; - x_i[0] = d_x(i,0); - x_i[1] = d_x(i,1); - x_i[2] = d_x(i,2); - auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - l_xcm0; - double dy = unwrapKK[1] - l_xcm1; - double dz = unwrapKK[2] - l_xcm2; - l_i00 += massone * (dy * dy + dz * dz); - l_i11 += massone * (dx * dx + dz * dz); - l_i22 += massone * (dx * dx + dy * dy); - l_i01 -= massone * dx * dy; - l_i12 -= massone * dy * dz; - l_i02 -= massone * dx * dz; - } - }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); - - } else { - - auto d_mass = atomKK->k_mass.template view(); - auto d_type = atomKK->k_type.template view(); - atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); - atomKK->k_mass.template sync(); - - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { - if (d_mask(i) & groupbit) { - double massone = d_mass(d_type(i)); - Few x_i; - x_i[0] = d_x(i,0); - x_i[1] = d_x(i,1); - x_i[2] = d_x(i,2); - auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - l_xcm0; - double dy = unwrapKK[1] - l_xcm1; - double dz = unwrapKK[2] - l_xcm2; - l_i00 += massone * (dy * dy + dz * dz); - l_i11 += massone * (dx * dx + dz * dz); - l_i22 += massone * (dx * dx + dy * dy); - l_i01 -= massone * dx * dy; - l_i12 -= massone * dy * dz; - l_i02 -= massone * dx * dz; - } - }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); - - } - - ione[1][0] = ione[0][1]; - ione[2][1] = ione[1][2]; - ione[2][0] = ione[0][2]; - MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world); -} - -namespace LAMMPS_NS { -template class GroupKokkos; -#ifdef LMP_KOKKOS_GPU -template class GroupKokkos; -#endif -} diff --git a/src/KOKKOS/group_kokkos.h b/src/KOKKOS/group_kokkos.h index f23023b17c..75c0601357 100644 --- a/src/KOKKOS/group_kokkos.h +++ b/src/KOKKOS/group_kokkos.h @@ -15,22 +15,352 @@ #define LMP_GROUP_KOKKOS_H #include "group.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "domain_kokkos.h" +#include "kokkos_few.h" #include "kokkos_type.h" + namespace LAMMPS_NS { -template class GroupKokkos : public Group { public: - GroupKokkos(class LAMMPS *); - double mass(int); // total mass of atoms in group - void xcm(int, double, double *); // center-of-mass coords of group - void vcm(int, double, double *); // center-of-mass velocity of group - void angmom(int, double *, double *); // angular momentum of group - void inertia(int, double *, double[3][3]); // inertia tensor + GroupKokkos(LAMMPS *lmp) : Group(lmp) { atomKK = (AtomKokkos *)atom; } + +// ---------------------------------------------------------------------- +// computations on a group of atoms +// ---------------------------------------------------------------------- + +/* ---------------------------------------------------------------------- + compute the total mass of group of atoms + use either per-type mass or per-atom rmass +------------------------------------------------------------------------- */ + +template +double mass_kk(int igroup) +{ + auto execution_space = ExecutionSpaceFromDevice::space; + + int groupbit = bitmask[igroup]; + auto d_mask = atomKK->k_mask.template view(); + double one = 0.0; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,MASK_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) { + if (d_mask(i) & groupbit) l_one += d_rmass(i); + }, one); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,MASK_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) { + if (d_mask(i) & groupbit) l_one += d_mass(d_type(i)); + }, one); + + } + + double all; + MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world); + return all; +} + +/* ---------------------------------------------------------------------- + compute the center-of-mass coords of group of atoms + masstotal = total mass + return center-of-mass coords in cm[] + must unwrap atoms to compute center-of-mass correctly +------------------------------------------------------------------------- */ + +template +void xcm_kk(int igroup, double masstotal, double *xcm) +{ + auto execution_space = ExecutionSpaceFromDevice::space; + + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + double cmone[3] = {0.0, 0.0, 0.0}; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + l_cmx += unwrapKK[0] * massone; + l_cmy += unwrapKK[1] * massone; + l_cmz += unwrapKK[2] * massone; + } + }, cmone[0], cmone[1], cmone[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + l_cmx += unwrapKK[0] * massone; + l_cmy += unwrapKK[1] * massone; + l_cmz += unwrapKK[2] * massone; + } + }, cmone[0], cmone[1], cmone[2]); + + } + + MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world); + if (masstotal > 0.0) { + xcm[0] /= masstotal; + xcm[1] /= masstotal; + xcm[2] /= masstotal; + } +} + +/* ---------------------------------------------------------------------- + compute the center-of-mass velocity of group of atoms + masstotal = total mass + return center-of-mass velocity in vcm[] +------------------------------------------------------------------------- */ + +template +void vcm_kk(int igroup, double masstotal, double *vcm) +{ + auto execution_space = ExecutionSpaceFromDevice::space; + + int groupbit = bitmask[igroup]; + auto d_v = atomKK->k_v.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + double p[3] = {0.0, 0.0, 0.0}; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + l_px += d_v(i,0) * massone; + l_py += d_v(i,1) * massone; + l_pz += d_v(i,2) * massone; + } + }, p[0], p[1], p[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + l_px += d_v(i,0) * massone; + l_py += d_v(i,1) * massone; + l_pz += d_v(i,2) * massone; + } + }, p[0], p[1], p[2]); + + } + + MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); + if (masstotal > 0.0) { + vcm[0] /= masstotal; + vcm[1] /= masstotal; + vcm[2] /= masstotal; + } +} + +/* ---------------------------------------------------------------------- + compute the angular momentum L (lmom) of group + around center-of-mass cm + must unwrap atoms to compute L correctly +------------------------------------------------------------------------- */ + +template +void angmom_kk(int igroup, double *xcm, double *lmom) +{ + auto execution_space = ExecutionSpaceFromDevice::space; + + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_v = atomKK->k_v.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + auto l_xcm0 = xcm[0]; + auto l_xcm1 = xcm[1]; + auto l_xcm2 = xcm[2]; + double p[3] = {0.0, 0.0, 0.0}; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); + l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); + l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); + } + }, p[0], p[1], p[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); + l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); + l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); + } + }, p[0], p[1], p[2]); + + } + MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world); +} + +/* ---------------------------------------------------------------------- + compute moment of inertia tensor around center-of-mass xcm of group + must unwrap atoms to compute itensor correctly +------------------------------------------------------------------------- */ + +template +void inertia_kk(int igroup, double *xcm, double itensor[3][3]) +{ + auto execution_space = ExecutionSpaceFromDevice::space; + + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + auto l_xcm0 = xcm[0]; + auto l_xcm1 = xcm[1]; + auto l_xcm2 = xcm[2]; + + double ione[3][3]; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) ione[i][j] = 0.0; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_i00 += massone * (dy * dy + dz * dz); + l_i11 += massone * (dx * dx + dz * dz); + l_i22 += massone * (dx * dx + dy * dy); + l_i01 -= massone * dx * dy; + l_i12 -= massone * dy * dz; + l_i02 -= massone * dx * dz; + } + }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_i00 += massone * (dy * dy + dz * dz); + l_i11 += massone * (dx * dx + dz * dz); + l_i22 += massone * (dx * dx + dy * dy); + l_i01 -= massone * dx * dy; + l_i12 -= massone * dy * dz; + l_i02 -= massone * dx * dz; + } + }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); + + } + + ione[1][0] = ione[0][1]; + ione[2][1] = ione[1][2]; + ione[2][0] = ione[0][2]; + MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world); +} - private: - ExecutionSpace execution_space; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp index eafa7a08ec..89ca31b9ca 100644 --- a/src/KOKKOS/improper_harmonic_kokkos.cpp +++ b/src/KOKKOS/improper_harmonic_kokkos.cpp @@ -74,14 +74,14 @@ void ImproperHarmonicKokkos::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - if(k_eatom.extent(0) < maxeatom) { + if ((int)k_eatom.extent(0) < maxeatom) { memoryKK->destroy_kokkos(k_eatom,eatom); memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); d_eatom = k_eatom.template view(); } else Kokkos::deep_copy(d_eatom,0.0); } if (vflag_atom) { - if(k_vatom.extent(0) < maxvatom) { + if ((int)k_vatom.extent(0) < maxvatom) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom"); d_vatom = k_vatom.template view(); diff --git a/src/KOKKOS/improper_hybrid_kokkos.cpp b/src/KOKKOS/improper_hybrid_kokkos.cpp index bfa55978cc..885405187b 100644 --- a/src/KOKKOS/improper_hybrid_kokkos.cpp +++ b/src/KOKKOS/improper_hybrid_kokkos.cpp @@ -77,7 +77,7 @@ void ImproperHybridKokkos::compute(int eflag, int vflag) Kokkos::parallel_for(nimproperlist_orig,LAMMPS_LAMBDA(int i) { const int m = d_map[d_improperlist_orig(i,4)]; - if (m >= 0) Kokkos::atomic_increment(&d_nimproperlist[m]); + if (m >= 0) Kokkos::atomic_inc(&d_nimproperlist[m]); }); k_nimproperlist.modify_device(); @@ -88,7 +88,7 @@ void ImproperHybridKokkos::compute(int eflag, int vflag) if (h_nimproperlist[m] > maximproper_all) maximproper_all = h_nimproperlist[m] + EXTRA; - if (k_improperlist.d_view.extent(1) < maximproper_all) + if ((int)k_improperlist.d_view.extent(1) < maximproper_all) MemKK::realloc_kokkos(k_improperlist, "improper_hybrid:improperlist", nstyles, maximproper_all, 5); auto d_improperlist = k_improperlist.d_view; diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index 026c8afcb4..c194cc173f 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -221,15 +221,19 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type ***&array, int n1, int n2, int n3, const char *name) { data = TYPE(std::string(name),n1,n2,n3); - bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1; + bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2; + typename TYPE::value_type **plane = (typename TYPE::value_type **) smalloc(nbytes,name); + nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1; array = (typename TYPE::value_type ***) smalloc(nbytes,name); + bigint m; for (int i = 0; i < n1; i++) { if (n2 == 0) { array[i] = nullptr; } else { - nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2; - array[i] = (typename TYPE::value_type **) smalloc(nbytes,name); + m = ((bigint) i) * n2; + array[i] = &plane[m]; + for (int j = 0; j < n2; j++) { if (n3 == 0) array[i][j] = nullptr; @@ -248,15 +252,19 @@ template { data = TYPE(std::string(name),n1,n2); h_data = Kokkos::create_mirror_view(data); - bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1; + bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2; + typename TYPE::value_type **plane = (typename TYPE::value_type **) smalloc(nbytes,name); + nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1; array = (typename TYPE::value_type ***) smalloc(nbytes,name); + bigint m; for (int i = 0; i < n1; i++) { if (n2 == 0) { array[i] = nullptr; } else { - nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2; - array[i] = (typename TYPE::value_type **) smalloc(nbytes,name); + m = ((bigint) i) * n2; + array[i] = &plane[m]; + for (int j = 0; j < n2; j++) { if (n3 == 0) array[i][j] = nullptr; @@ -288,15 +296,19 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type ***&array, { if (array == nullptr) return create_kokkos(data,array,n1,n2,n3,name); data.resize(n1,n2,n3); - bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1; - array = (typename TYPE::value_type ***) smalloc(nbytes,name); + bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2; + typename TYPE::value_type **plane = (typename TYPE::value_type **) srealloc(array[0],nbytes,name); + nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1; + array = (typename TYPE::value_type ***) srealloc(array,nbytes,name); + bigint m; for (int i = 0; i < n1; i++) { if (n2 == 0) { array[i] = nullptr; } else { - nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2; - array[i] = (typename TYPE::value_type **) smalloc(nbytes,name); + m = ((bigint) i) * n2; + array[i] = &plane[m]; + for (int j = 0; j < n2; j++) { if (n3 == 0) array[i][j] = nullptr; @@ -316,10 +328,9 @@ template void destroy_kokkos(TYPE data, typename TYPE::value_type*** &array) { if (array == nullptr) return; - int n1 = data.extent(0); - for (int i = 0; i < n1; ++i) - sfree(array[i]); data = TYPE(); + + sfree(array[0]); sfree(array); array = nullptr; } @@ -411,7 +422,7 @@ template TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 ,const char *name) { data = TYPE(); - data = TYPE(std::string(name) ,n1,n2,n3,n4,n5,n6); + data = TYPE(std::string(name),n1,n2,n3,n4,n5,n6); return data; } @@ -420,4 +431,3 @@ TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 , } #endif - diff --git a/src/KOKKOS/mliap_data_kokkos.cpp b/src/KOKKOS/mliap_data_kokkos.cpp index fd5a852114..fd2859f802 100644 --- a/src/KOKKOS/mliap_data_kokkos.cpp +++ b/src/KOKKOS/mliap_data_kokkos.cpp @@ -145,13 +145,13 @@ void MLIAPDataKokkos::generate_neighdata(class NeighList *list_in, i auto type = atomKK->k_type.view(); auto map=k_pairmliap->k_map.template view(); - Kokkos::parallel_scan(natomneigh, KOKKOS_LAMBDA (int ii, int &update, const bool final) { + Kokkos::parallel_scan(Kokkos::RangePolicy(0,natomneigh), KOKKOS_LAMBDA (int ii, int &update, const bool final) { if (final) d_ij(ii) = update; update += d_numneighs(ii); }); - Kokkos::parallel_for(natomneigh, KOKKOS_LAMBDA (int ii) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,natomneigh), KOKKOS_LAMBDA (int ii) { int ij = d_ij(ii); const int i = d_ilist[ii]; const double xtmp = x(i, 0); @@ -183,7 +183,7 @@ void MLIAPDataKokkos::generate_neighdata(class NeighList *list_in, i d_ielems[ii] = ielem; }); - Kokkos::parallel_for(nmax, KOKKOS_LAMBDA (int i) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax), KOKKOS_LAMBDA (int i) { const int itype = type(i); d_elems(i) = map(itype); }); @@ -225,7 +225,7 @@ void MLIAPDataKokkos::grow_neigharrays() { auto d_cutsq=k_pairmliap->k_cutsq.template view(); auto h_cutsq=k_pairmliap->k_cutsq.template view(); auto d_numneighs = k_numneighs.template view(); - Kokkos::parallel_reduce(natomneigh, KOKKOS_LAMBDA (int ii, int &contrib) { + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,natomneigh), KOKKOS_LAMBDA (int ii, int &contrib) { const int i = d_ilist[ii]; int count=0; const double xtmp = x(i, 0); diff --git a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp index 1cf368e952..9f18078ac6 100644 --- a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp +++ b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp @@ -75,7 +75,7 @@ void MLIAPDescriptorSO3Kokkos::compute_forces(class MLIAPData *data_ Kokkos::View virial("virial"); data->k_pairmliap->k_vatom.template modify(); data->k_pairmliap->k_vatom.template sync(); - Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA(int ii) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,data->nlistatoms), KOKKOS_LAMBDA(int ii) { double fij[3]; const int i = d_iatoms(ii); @@ -187,7 +187,7 @@ void MLIAPDescriptorSO3Kokkos::compute_force_gradients(class MLIAPDa auto yoffset = data->yoffset, zoffset = data->zoffset, gamma_nnz = data->gamma_nnz; - Kokkos::parallel_for (data->nlistatoms, KOKKOS_LAMBDA (int ii) { + Kokkos::parallel_for (Kokkos::RangePolicy(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) { const int i = d_iatoms(ii); // ensure rij, inside, wj, and rcutij are of size jnum diff --git a/src/KOKKOS/pair_meam_kokkos.cpp b/src/KOKKOS/pair_meam_kokkos.cpp index 9082c410e0..9852727855 100644 --- a/src/KOKKOS/pair_meam_kokkos.cpp +++ b/src/KOKKOS/pair_meam_kokkos.cpp @@ -147,7 +147,7 @@ void PairMEAMKokkos::compute(int eflag_in, int vflag_in) auto l_numneigh_half = d_numneigh_half; auto l_offset = d_offset; - Kokkos::parallel_scan(inum_half, LAMMPS_LAMBDA(int ii, int &m_fill, bool final) { + Kokkos::parallel_scan(Kokkos::RangePolicy(0,inum_half), LAMMPS_LAMBDA(int ii, int &m_fill, bool final) { int i = l_ilist_half[ii]; m_fill += l_numneigh_half[i]; if (final) diff --git a/src/KOKKOS/pair_mliap_kokkos.cpp b/src/KOKKOS/pair_mliap_kokkos.cpp index 5739a8ea2e..3c5bb7d910 100644 --- a/src/KOKKOS/pair_mliap_kokkos.cpp +++ b/src/KOKKOS/pair_mliap_kokkos.cpp @@ -302,7 +302,7 @@ void PairMLIAPKokkos::e_tally(MLIAPData* data) auto d_iatoms = k_data->k_iatoms.template view(); auto d_eatoms = k_data->k_eatoms.template view(); auto d_eatom = k_eatom.template view(); - Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA (int ii) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) { d_eatom(d_iatoms(ii)) = d_eatoms(ii); }); k_eatom.modify(); diff --git a/src/KOKKOS/pair_pod_kokkos.cpp b/src/KOKKOS/pair_pod_kokkos.cpp index b0cab5a1df..85b9802034 100644 --- a/src/KOKKOS/pair_pod_kokkos.cpp +++ b/src/KOKKOS/pair_pod_kokkos.cpp @@ -532,7 +532,7 @@ int PairPODKokkos::NeighborCount(t_pod_1i l_numij, double l_rcutsq, auto l_neighbors = d_neighbors; // compute number of pairs for each atom i - Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { + Kokkos::parallel_for("NeighborCount", typename Kokkos::TeamPolicy(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy::member_type& team) { int i = team.league_rank(); int gi = l_ilist(gi1 + i); double xi0 = l_x(gi, 0); @@ -555,7 +555,7 @@ int PairPODKokkos::NeighborCount(t_pod_1i l_numij, double l_rcutsq, }); // accumalative sum - Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) { + Kokkos::parallel_scan("InclusivePrefixSum", Kokkos::RangePolicy(0,Ni + 1), KOKKOS_LAMBDA(int i, int& update, const bool final) { if (i > 0) { update += l_numij(i); if (final) { @@ -582,7 +582,7 @@ void PairPODKokkos::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, auto l_map = d_map; auto l_type = type; - Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { + Kokkos::parallel_for("NeighborList", typename Kokkos::TeamPolicy(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy::member_type& team) { int i = team.league_rank(); int gi = l_ilist(gi1 + i); double xi0 = l_x(gi, 0); @@ -622,7 +622,7 @@ void PairPODKokkos::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, int l_inversedegree, int l_nbesselpars, int Nij) { - Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) { + Kokkos::parallel_for("ComputeRadialBasis", Kokkos::RangePolicy(0,Nij), KOKKOS_LAMBDA(int n) { double xij1 = l_rij(0+3*n); double xij2 = l_rij(1+3*n); double xij3 = l_rij(2+3*n); @@ -722,7 +722,7 @@ void PairPODKokkos::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod template void PairPODKokkos::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2) { - Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("MatrixMultiply", Kokkos::RangePolicy(0,r1 * c2), KOKKOS_LAMBDA(int idx) { int j = idx / r1; // Calculate column index int i = idx % r1; // Calculate row index double sum = 0.0; @@ -737,7 +737,7 @@ template void PairPODKokkos::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N) { - Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) { + Kokkos::parallel_for("AngularBasis", Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA(int j) { double x = l_rij(j*3 + 0); double y = l_rij(j*3 + 1); double z = l_rij(j*3 + 2); @@ -817,7 +817,7 @@ void PairPODKokkos::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf { int totalIterations = l_nrbf3 * l_K3 * Ni; if (l_nelements==1) { - Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int k = idx % l_K3; int temp = idx / l_K3; int m = temp % l_nrbf3; @@ -835,7 +835,7 @@ void PairPODKokkos::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf }); } else { - Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int k = idx % l_K3; int temp = idx / l_K3; int m = temp % l_nrbf3; @@ -863,7 +863,7 @@ void PairPODKokkos::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_ int l_nrbf2, const int Ni, const int Nij) { int totalIterations = l_nrbf2 * Nij; - Kokkos::parallel_for("twobodydesc", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("twobodydesc", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int n = idx / l_nrbf2; // pair index int m = idx % l_nrbf2; // rbd index int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m @@ -876,7 +876,7 @@ void PairPODKokkos::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij) { int totalIterations = l_nrbf2 * Nij; - Kokkos::parallel_for("twobody_forces", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("twobody_forces", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int n = idx / l_nrbf2; // pair index int m = idx % l_nrbf2; // rbd index int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m @@ -893,7 +893,7 @@ void PairPODKokkos::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_po int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni) { int totalIterations = l_nrbf3 * Ni; - Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("ThreeBodyDesc", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int m = idx % l_nrbf3; int i = idx / l_nrbf3; int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i; @@ -925,7 +925,7 @@ void PairPODKokkos::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p { int totalIterations = l_nrbf3 * Nij; if (l_nelements==1) { - Kokkos::parallel_for("threebody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("threebody_forces1", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf3; // Calculate j using integer division int m = idx % l_nrbf3; // Calculate m using modulo operation int idxR = j + Nij * m; // Pre-compute the index for rbf @@ -961,7 +961,7 @@ void PairPODKokkos::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p } else { int N3 = Ni * l_nabf3 * l_nrbf3; - Kokkos::parallel_for("threebody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("threebody_forces2", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf3; // Derive the original j value int m = idx % l_nrbf3; // Derive the original m value int i2 = l_tj(j) - 1; @@ -1007,7 +1007,7 @@ void PairPODKokkos::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3, { int totalIterations = l_nrbf3 * Ni; if (l_nelements==1) { - Kokkos::parallel_for("threebody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("threebody_forcecoeff1", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf3; // Calculate j using integer division int m = idx % l_nrbf3; // Calculate m using modulo operation for (int p = 0; p < l_nabf3; p++) { @@ -1024,7 +1024,7 @@ void PairPODKokkos::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3, } else { int N3 = Ni * l_nabf3 * l_nrbf3; - Kokkos::parallel_for("threebody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("threebody_forcecoeff2", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf3; // Derive the original j value int m = idx % l_nrbf3; // Derive the original m value for (int p = 0; p < l_nabf3; p++) { @@ -1054,7 +1054,7 @@ void PairPODKokkos::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_po t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni) { int totalIterations = l_nrbf4 * Ni; - Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("fourbodydesc", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int m = idx % l_nrbf4; int i = idx / l_nrbf4; int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i; @@ -1092,7 +1092,7 @@ void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po { int totalIterations = l_nrbf4 * Nij; if (l_nelements==1) { - Kokkos::parallel_for("fourbody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("fourbody_forces1", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf4; // Derive the original j value int m = idx % l_nrbf4; // Derive the original m value int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j); @@ -1151,7 +1151,7 @@ void PairPODKokkos::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po } else { int N3 = Ni * l_nabf4 * l_nrbf4; - Kokkos::parallel_for("fourbody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("fourbody_forces2", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf4; // Derive the original j value int m = idx % l_nrbf4; // Derive the original m value int idxM = j + Nij * m; @@ -1241,7 +1241,7 @@ void PairPODKokkos::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4, { int totalIterations = l_nrbf4 * Ni; if (l_nelements==1) { - Kokkos::parallel_for("fourbody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("fourbody_forcecoeff1", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf4; // Derive the original j value int m = idx % l_nrbf4; // Derive the original m value int idxU = l_K3 * m + l_K3*l_nrbf3*i; @@ -1268,7 +1268,7 @@ void PairPODKokkos::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4, } else { int N3 = Ni * l_nabf4 * l_nrbf4; - Kokkos::parallel_for("fourbody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("fourbody_forcecoeff2", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int i = idx / l_nrbf4; // Derive the original j value int m = idx % l_nrbf4; // Derive the original m value for (int p = 0; p < l_nabf4; p++) { @@ -1311,7 +1311,7 @@ void PairPODKokkos::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoe t_pod_1i l_idxi, t_pod_1i l_tj, int l_nelements, int l_nrbf3, int l_K3, int Nij) { int totalIterations = l_nrbf3 * Nij; - Kokkos::parallel_for("allbody_forces", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("allbody_forces", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int j = idx / l_nrbf3; // Calculate j using integer division int m = idx % l_nrbf3; // Calculate m using modulo operation int i2 = l_tj(j) - 1; @@ -1346,7 +1346,7 @@ template void PairPODKokkos::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni) { int totalIterations = n12 * Ni; - Kokkos::parallel_for("crossdesc", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("crossdesc", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int n = idx % Ni; int i = idx / Ni; @@ -1359,7 +1359,7 @@ void PairPODKokkos::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni) { int totalIterations = n12 * Ni; - Kokkos::parallel_for("crossdesc_reduction", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("crossdesc_reduction", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int n = idx % Ni; // Ni int m = idx / Ni; // n12 int k1 = ind1(m); // dd1 @@ -1375,7 +1375,7 @@ void PairPODKokkos::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2, template void PairPODKokkos::set_array_to_zero(t_pod_1d a, int N) { - Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) { + Kokkos::parallel_for("initialize_array", Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA(int i) { a(i) = 0.0; }); } @@ -1480,7 +1480,7 @@ void PairPODKokkos::blockatom_base_coefficients(t_pod_1d ei, t_pod_1 int nDes = Mdesc; int nCoeff = nCoeffPerElement; - Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) { + Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy(0,Ni), KOKKOS_LAMBDA(int n) { int nc = nCoeff*(tyai[n]-1); ei[n] = cefs[0 + nc]; for (int m=0; m::blockatom_base_coefficients(t_pod_1d ei, t_pod_1 }); int totalIterations = Ni*nDes; - Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int n = idx % Ni; int m = idx / Ni; int nc = nCoeff*(tyai[n]-1); @@ -1516,7 +1516,7 @@ void PairPODKokkos::blockatom_environment_descriptors(t_pod_1d ei, t int nCoeff = nCoeffPerElement; int totalIterations = Ni*nCom; - Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("pca", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int i = idx % Ni; int k = idx / Ni; double sum = 0.0; @@ -1528,7 +1528,7 @@ void PairPODKokkos::blockatom_environment_descriptors(t_pod_1d ei, t }); totalIterations = Ni*nCls; - Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("inverse_square_distances", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int i = idx % Ni; int j = idx / Ni; int typei = tyai[i]-1; @@ -1541,14 +1541,14 @@ void PairPODKokkos::blockatom_environment_descriptors(t_pod_1d ei, t D[i + Ni*j] = 1.0 / sum; }); - Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) { + Kokkos::parallel_for("Probabilities", Kokkos::RangePolicy(0,Ni), KOKKOS_LAMBDA(int i) { double sum = 0; for (int j = 0; j < nCls; j++) sum += D[i + Ni*j]; sumD[i] = sum; for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum; }); - Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) { + Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy(0,Ni), KOKKOS_LAMBDA(int n) { int nc = nCoeff*(tyai[n]-1); ei[n] = cefs[0 + nc]; for (int k = 0; k::blockatom_environment_descriptors(t_pod_1d ei, t ei[n] += cefs[1 + m + nDes*k + nc]*B[n + Ni*m]*P[n + Ni*k]; }); - Kokkos::parallel_for("env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("env_coefficients", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int n = idx % Ni; int k = idx / Ni; int nc = nCoeff*(tyai[n]-1); @@ -1567,7 +1567,7 @@ void PairPODKokkos::blockatom_environment_descriptors(t_pod_1d ei, t }); totalIterations = Ni*nDes; - Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int n = idx % Ni; int m = idx / Ni; int nc = nCoeff*(tyai[n]-1); @@ -1577,7 +1577,7 @@ void PairPODKokkos::blockatom_environment_descriptors(t_pod_1d ei, t cb[n + Ni*m] = sum; }); - Kokkos::parallel_for("base_env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) { + Kokkos::parallel_for("base_env_coefficients", Kokkos::RangePolicy(0,totalIterations), KOKKOS_LAMBDA(int idx) { int i = idx % Ni; int m = idx / Ni; int typei = tyai[i]-1; @@ -1670,7 +1670,7 @@ template void PairPODKokkos::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij) { auto l_f = f; - Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) { + Kokkos::parallel_for("TallyForce", Kokkos::RangePolicy(0,Nij), KOKKOS_LAMBDA(int n) { int im = l_ai(n); int jm = l_aj(n); int n3 = 3*n; @@ -1694,7 +1694,7 @@ void PairPODKokkos::tallyenergy(t_pod_1d l_ei, int istart, int Ni) // For global energy tally if (eflag_global) { double local_eng_vdwl = 0.0; - Kokkos::parallel_reduce("GlobalEnergyTally", Ni, KOKKOS_LAMBDA(int k, E_FLOAT& update) { + Kokkos::parallel_reduce("GlobalEnergyTally", Kokkos::RangePolicy(0,Ni), KOKKOS_LAMBDA(int k, E_FLOAT& update) { update += l_ei(k); }, local_eng_vdwl); @@ -1704,7 +1704,7 @@ void PairPODKokkos::tallyenergy(t_pod_1d l_ei, int istart, int Ni) // For per-atom energy tally if (eflag_atom) { - Kokkos::parallel_for("PerAtomEnergyTally", Ni, KOKKOS_LAMBDA(int k) { + Kokkos::parallel_for("PerAtomEnergyTally", Kokkos::RangePolicy(0,Ni), KOKKOS_LAMBDA(int k) { l_eatom(istart + k) += l_ei(k); }); } @@ -1718,7 +1718,7 @@ void PairPODKokkos::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po if (vflag_global) { for (int j=0; j<3; j++) { F_FLOAT sum = 0.0; - Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(j + k3) * l_fij(j + k3); }, sum); @@ -1726,21 +1726,21 @@ void PairPODKokkos::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po } F_FLOAT sum = 0.0; - Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(k3) * l_fij(1 + k3); }, sum); virial[3] -= sum; sum = 0.0; - Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(k3) * l_fij(2 + k3); }, sum); virial[4] -= sum; sum = 0.0; - Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) { int k3 = 3*k; update += l_rij(1+k3) * l_fij(2+k3); }, sum); @@ -1748,7 +1748,7 @@ void PairPODKokkos::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po } if (vflag_atom) { - Kokkos::parallel_for("PerAtomStressTally", Nij, KOKKOS_LAMBDA(int k) { + Kokkos::parallel_for("PerAtomStressTally", Kokkos::RangePolicy(0,Nij), KOKKOS_LAMBDA(int k) { int i = l_ai(k); int j = l_aj(k); int k3 = 3*k; diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index b0a53a27fd..85bd139bfb 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -385,13 +385,13 @@ void PairReaxFFKokkos::init_md() swb = api->control->nonb_cut; enobondsflag = api->control->enobondsflag; - if (fabs(swa) > 0.01) - error->warning(FLERR,"Warning: non-zero lower Taper-radius cutoff"); + if ((fabs(swa) > 0.01) && (comm->me == 0)) + error->warning(FLERR, "Non-zero lower Taper-radius cutoff"); - if (swb < 0) - error->one(FLERR,"Negative upper Taper-radius cutoff"); - else if (swb < 5) - error->one(FLERR,"Warning: very low Taper-radius cutoff: {}\n", swb); + if (swb < 0.0) { + error->all(FLERR,"Negative upper Taper-radius cutoff"); + } else if ((swb < 5.0) && (comm->me ==0)) + error->warning(FLERR,"Very low Taper-radius cutoff: {}\n", swb); d1 = swb - swa; d7 = powint(d1,7); diff --git a/src/KOKKOS/rand_pool_wrap_kokkos.cpp b/src/KOKKOS/rand_pool_wrap_kokkos.cpp index efdd932987..eb2caf879f 100644 --- a/src/KOKKOS/rand_pool_wrap_kokkos.cpp +++ b/src/KOKKOS/rand_pool_wrap_kokkos.cpp @@ -39,7 +39,7 @@ RandPoolWrap::~RandPoolWrap() void RandPoolWrap::destroy() { if (random_thr) { - for (int i=1; i < nthreads; ++i) + for (int i = 1; i < nthreads; ++i) delete random_thr[i]; delete[] random_thr; @@ -51,7 +51,7 @@ void RandPoolWrap::init(RanMars* random, int seed) { // deallocate pool of RNGs if (random_thr) { - for (int i=1; i < this->nthreads; ++i) + for (int i = 1; i < nthreads; ++i) delete random_thr[i]; delete[] random_thr; diff --git a/src/KOKKOS/rand_pool_wrap_kokkos.h b/src/KOKKOS/rand_pool_wrap_kokkos.h index f79a6a1caa..60e9776039 100644 --- a/src/KOKKOS/rand_pool_wrap_kokkos.h +++ b/src/KOKKOS/rand_pool_wrap_kokkos.h @@ -59,10 +59,12 @@ class RandPoolWrap : protected Pointers { typedef Kokkos::Experimental::UniqueToken< LMPHostType, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; +#ifndef LMP_KOKKOS_GPU unique_token_type unique_token; int tid = (int) unique_token.acquire(); rand_wrap.rng = random_thr[tid]; unique_token.release(tid); +#endif return rand_wrap; } diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 08951138c3..575ca9c2d3 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -63,10 +63,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { double xs, ys, zs; double xnear[3], xorig[3]; - if (dynamic) { - xorig[0] = x; xorig[1] = y; xorig[2] = z; + xorig[0] = x; xorig[1] = y; xorig[2] = z; + if (dynamic) inverse_transform(x, y, z); - } xnear[0] = x; xnear[1] = y; xnear[2] = z; diff --git a/src/Purge.list b/src/Purge.list index 7098d39e3a..2b949d694d 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -53,6 +53,8 @@ lmpinstalledpkgs.h lmpgitversion.h mliap_model_python_couple.cpp mliap_model_python_couple.h +# removed in Dec 2024 +group_kokkos.cpp # renamed in September 2024 group_ndx.cpp group_ndx.h diff --git a/src/REAXFF/reaxff_init_md.cpp b/src/REAXFF/reaxff_init_md.cpp index 2d0459691f..6ede21e4ca 100644 --- a/src/REAXFF/reaxff_init_md.cpp +++ b/src/REAXFF/reaxff_init_md.cpp @@ -80,15 +80,14 @@ namespace ReaxFF { swa = control->nonb_low; swb = control->nonb_cut; - if (fabs(swa) > 0.01 && control->me == 0) + if ((fabs(swa) > 0.01) && (control->me == 0)) error->warning(FLERR, "Non-zero lower Taper-radius cutoff"); - if (swb < 0) { + if (swb < 0.0) { error->all(FLERR,"Negative upper Taper-radius cutoff"); - } - else if (swb < 5 && control->me == 0) - error->warning(FLERR,fmt::format("Warning: very low Taper-radius cutoff: " - "{}\n", swb)); + } else if ((swb < 5.0) && (control->me == 0)) + error->warning(FLERR,fmt::format("Very low Taper-radius cutoff: {}\n", swb)); + d1 = swb - swa; d7 = pow(d1, 7.0); swa2 = SQR(swa); diff --git a/src/accelerator_kokkos.h b/src/accelerator_kokkos.h index dec52b2363..2ab0ea01d5 100644 --- a/src/accelerator_kokkos.h +++ b/src/accelerator_kokkos.h @@ -23,6 +23,7 @@ #include "comm_kokkos.h" // IWYU pragma: export #include "comm_tiled_kokkos.h" // IWYU pragma: export #include "domain_kokkos.h" // IWYU pragma: export +#include "group_kokkos.h" // IWYU pragma: export #include "kokkos.h" // IWYU pragma: export #include "memory_kokkos.h" // IWYU pragma: export #include "modify_kokkos.h" // IWYU pragma: export @@ -39,6 +40,7 @@ #include "comm_brick.h" #include "comm_tiled.h" #include "domain.h" +#include "group.h" #include "memory.h" #include "modify.h" #include "neighbor.h" @@ -86,6 +88,11 @@ class DomainKokkos : public Domain { DomainKokkos(class LAMMPS *lmp) : Domain(lmp) {} }; +class GroupKokkos : public Group { + public: + GroupKokkos(class LAMMPS *lmp) : Group(lmp) {} +}; + class NeighborKokkos : public Neighbor { public: NeighborKokkos(class LAMMPS *lmp) : Neighbor(lmp) {} diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp index e8b5d19fa5..bc4674a6b6 100644 --- a/src/comm_tiled.cpp +++ b/src/comm_tiled.cpp @@ -2507,7 +2507,8 @@ void CommTiled::deallocate_swap(int n) memory->destroy(sendbox_multi[i]); memory->destroy(sendbox_multiold[i]); - delete [] maxsendlist[i]; + if (maxsendlist) + delete [] maxsendlist[i]; if (sendlist && sendlist[i]) { for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]); diff --git a/src/comm_tiled.h b/src/comm_tiled.h index 64b80d8d18..751a74d1b7 100644 --- a/src/comm_tiled.h +++ b/src/comm_tiled.h @@ -153,7 +153,7 @@ class CommTiled : public Comm { virtual void grow_swap_send(int, int, int); // grow swap arrays for send and recv void grow_swap_send_multi(int, int); // grow multi swap arrays for send and recv void grow_swap_recv(int, int); - void deallocate_swap(int); // deallocate swap arrays + void deallocate_swap(int); // deallocate swap arrays }; } // namespace LAMMPS_NS diff --git a/src/fix.h b/src/fix.h index 7609caf5fe..ebf5224171 100644 --- a/src/fix.h +++ b/src/fix.h @@ -264,6 +264,8 @@ class Fix : protected Pointers { virtual double memory_usage() { return 0.0; } + void set_copymode(int value) { copymode = value; } + protected: int instance_me; // which Fix class instantiation I am diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp index 4258e3359f..60a55bbbb4 100644 --- a/src/fix_langevin.cpp +++ b/src/fix_langevin.cpp @@ -191,6 +191,8 @@ FixLangevin::FixLangevin(LAMMPS *lmp, int narg, char **arg) : FixLangevin::~FixLangevin() { + if (copymode) return; + delete random; delete[] tstr; delete[] gfactor1; @@ -509,7 +511,7 @@ void FixLangevin::post_force(int /*vflag*/) else post_force_templated<1,0,0,0,0,0>(); else if (gjfflag) - if (tallyflag || osflag) + if (tallyflag || osflag) if (tbiasflag == BIAS) if (rmass) if (zeroflag) post_force_templated<0,1,1,1,1,1>(); diff --git a/src/lammps.cpp b/src/lammps.cpp index 2cfb33f14c..1abc34a11b 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -872,7 +872,9 @@ void LAMMPS::create() else atom->create_avec("atomic",0,nullptr,1); - group = new Group(this); + if (kokkos) group = new GroupKokkos(this); + else group = new Group(this); + force = new Force(this); // must be after group, to create temperature if (kokkos) modify = new ModifyKokkos(this); diff --git a/unittest/force-styles/tests/atomic-pair-lepton_sphere.yaml b/unittest/force-styles/tests/atomic-pair-lepton_sphere.yaml index 222aa8b93c..0e00e418a1 100644 --- a/unittest/force-styles/tests/atomic-pair-lepton_sphere.yaml +++ b/unittest/force-styles/tests/atomic-pair-lepton_sphere.yaml @@ -1,6 +1,7 @@ --- lammps_version: 28 Mar 2023 date_generated: Fri Apr 7 18:04:29 2023 +tags: unstable epsilon: 7.5e-13 skip_tests: single prerequisites: ! | diff --git a/unittest/force-styles/tests/atomic-pair-lj_cut_sphere.yaml b/unittest/force-styles/tests/atomic-pair-lj_cut_sphere.yaml index 3a5122a896..193a65122e 100644 --- a/unittest/force-styles/tests/atomic-pair-lj_cut_sphere.yaml +++ b/unittest/force-styles/tests/atomic-pair-lj_cut_sphere.yaml @@ -1,6 +1,7 @@ --- lammps_version: 28 Mar 2023 date_generated: Thu Mar 30 14:38:22 2023 +tags: unstable epsilon: 7.5e-13 skip_tests: single prerequisites: ! | diff --git a/unittest/force-styles/tests/atomic-pair-lj_expand_sphere.yaml b/unittest/force-styles/tests/atomic-pair-lj_expand_sphere.yaml index 24a17a275c..d929544809 100644 --- a/unittest/force-styles/tests/atomic-pair-lj_expand_sphere.yaml +++ b/unittest/force-styles/tests/atomic-pair-lj_expand_sphere.yaml @@ -1,6 +1,7 @@ --- lammps_version: 28 Mar 2023 date_generated: Fri Apr 7 18:07:13 2023 +tags: unstable epsilon: 7.5e-13 skip_tests: single prerequisites: ! | diff --git a/unittest/force-styles/tests/bond-harmonic_restrain.yaml b/unittest/force-styles/tests/bond-harmonic_restrain.yaml index 07546775ab..485dbfeafc 100644 --- a/unittest/force-styles/tests/bond-harmonic_restrain.yaml +++ b/unittest/force-styles/tests/bond-harmonic_restrain.yaml @@ -1,7 +1,7 @@ --- lammps_version: 8 Feb 2023 date_generated: Tue Mar 7 21:07:27 2023 -epsilon: 2.5e-13 +epsilon: 5.0e-13 skip_tests: extract prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml b/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml index c2c2b8cc6b..0e43bc1741 100644 --- a/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml +++ b/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml @@ -1,8 +1,7 @@ --- lammps_version: 7 Feb 2024 -tags: date_generated: Sat Apr 13 11:41:16 2024 -epsilon: 5.0e-11 +epsilon: 2.0e-10 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml index 31c682fc07..9fff99f8b0 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml @@ -1,7 +1,7 @@ --- lammps_version: 29 Aug 2024 date_generated: Tue Oct 1 12:45:25 2024 -epsilon: 2e-13 +epsilon: 1.0e-11 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml index ca539aa911..1678405074 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml @@ -1,7 +1,7 @@ --- lammps_version: 29 Aug 2024 date_generated: Tue Oct 1 12:45:46 2024 -epsilon: 1e-12 +epsilon: 2.5e-11 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-spring_rg.yaml b/unittest/force-styles/tests/fix-timestep-spring_rg.yaml index a6c5844b6c..bfd9310012 100644 --- a/unittest/force-styles/tests/fix-timestep-spring_rg.yaml +++ b/unittest/force-styles/tests/fix-timestep-spring_rg.yaml @@ -1,7 +1,7 @@ --- lammps_version: 17 Feb 2022 date_generated: Thu Mar 17 19:43:17 2022 -epsilon: 2e-14 +epsilon: 5.0e-14 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-wall_harmonic_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_harmonic_const.yaml index 57f38b4c37..5806582929 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_harmonic_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_harmonic_const.yaml @@ -1,7 +1,7 @@ --- lammps_version: 17 Feb 2022 date_generated: Fri Mar 18 22:18:01 2022 -epsilon: 4e-14 +epsilon: 5.0e-14 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-wall_lepton_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_lepton_const.yaml index 947bc6a95a..079383e04c 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_lepton_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_lepton_const.yaml @@ -1,7 +1,7 @@ --- lammps_version: 8 Feb 2023 date_generated: Thu Feb 23 00:40:51 2023 -epsilon: 4e-14 +epsilon: 5.0e-14 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml index 5431a8e0a8..590ed1f103 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml @@ -1,7 +1,7 @@ --- lammps_version: 27 Jun 2024 date_generated: Fri Aug 2 23:56:34 2024 -epsilon: 2e-14 +epsilon: 1.0e-13 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-wall_morse_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_morse_const.yaml index 391070609f..08080a6274 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_morse_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_morse_const.yaml @@ -1,7 +1,7 @@ --- lammps_version: 8 Feb 2023 date_generated: Thu Feb 23 15:26:55 2023 -epsilon: 4e-14 +epsilon: 1.0e-13 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-wall_table_linear.yaml b/unittest/force-styles/tests/fix-timestep-wall_table_linear.yaml index 6291de136a..ee86026216 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_table_linear.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_table_linear.yaml @@ -1,7 +1,7 @@ --- lammps_version: 8 Feb 2023 date_generated: Thu Feb 23 00:56:30 2023 -epsilon: 4e-14 +epsilon: 2.0e-13 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/fix-timestep-wall_table_spline.yaml b/unittest/force-styles/tests/fix-timestep-wall_table_spline.yaml index 6c6c674342..7f27c59e32 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_table_spline.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_table_spline.yaml @@ -1,7 +1,7 @@ --- lammps_version: 8 Feb 2023 date_generated: Thu Feb 23 00:56:30 2023 -epsilon: 4e-14 +epsilon: 2.0e-13 skip_tests: prerequisites: ! | atom full diff --git a/unittest/force-styles/tests/manybody-pair-pace_product.yaml b/unittest/force-styles/tests/manybody-pair-pace_product.yaml index 6db9f4220a..fe4cde6dac 100644 --- a/unittest/force-styles/tests/manybody-pair-pace_product.yaml +++ b/unittest/force-styles/tests/manybody-pair-pace_product.yaml @@ -1,7 +1,7 @@ --- lammps_version: 17 Feb 2022 date_generated: Fri Mar 18 22:17:48 2022 -epsilon: 7.5e-09 +epsilon: 1.5e-08 skip_tests: prerequisites: ! | pair pace diff --git a/unittest/force-styles/tests/manybody-pair-pace_recursive.yaml b/unittest/force-styles/tests/manybody-pair-pace_recursive.yaml index 61f7ce0ac9..3740718675 100644 --- a/unittest/force-styles/tests/manybody-pair-pace_recursive.yaml +++ b/unittest/force-styles/tests/manybody-pair-pace_recursive.yaml @@ -1,7 +1,7 @@ --- lammps_version: 10 Mar 2021 date_generated: Wed Apr 7 19:30:07 2021 -epsilon: 7.5e-09 +epsilon: 1.5e-08 prerequisites: ! | pair pace pre_commands: ! | diff --git a/unittest/force-styles/tests/mol-pair-lepton.yaml b/unittest/force-styles/tests/mol-pair-lepton.yaml index 33576e81c2..c0e26b34f6 100644 --- a/unittest/force-styles/tests/mol-pair-lepton.yaml +++ b/unittest/force-styles/tests/mol-pair-lepton.yaml @@ -1,7 +1,7 @@ --- lammps_version: 21 Nov 2023 date_generated: Thu Jan 18 11:01:50 2024 -epsilon: 5e-14 +epsilon: 1e-13 skip_tests: intel prerequisites: ! | atom full