From 6f852e7b2cf41d6d0bac8ae8cf15da6c6364f5a0 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 22 Dec 2020 12:55:44 -0700 Subject: [PATCH] Remove 'host' option for Kokkos pair and fix comm --- src/KOKKOS/comm_kokkos.cpp | 10 +--- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 16 ++--- src/KOKKOS/kokkos.cpp | 75 ++++++++++-------------- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 15 +++-- 4 files changed, 52 insertions(+), 64 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c25fd1d111..23923419e8 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -365,12 +365,9 @@ void CommKokkos::reverse_comm_device() void CommKokkos::forward_comm_fix(Fix *fix, int size) { - if (!fix->kokkosable || !fix->forward_comm_device || forward_fix_comm_classic) { + if (!(fix->execution_space == Device) || !fix->forward_comm_device || forward_fix_comm_classic) { k_sendlist.sync(); CommBrick::forward_comm_fix(fix); - } else if (forward_fix_comm_on_host) { - k_sendlist.sync(); - forward_comm_fix_device(fix); } else { k_sendlist.sync(); forward_comm_fix_device(fix); @@ -463,12 +460,9 @@ void CommKokkos::reverse_comm_compute(Compute *compute) void CommKokkos::forward_comm_pair(Pair *pair) { - if (!pair->kokkosable || forward_pair_comm_classic) { + if (!(fix->execution_space == Device) || forward_pair_comm_classic) { k_sendlist.sync(); CommBrick::forward_comm_pair(pair); - } else if (forward_pair_comm_on_host) { - k_sendlist.sync(); - forward_comm_pair_device(pair); } else { k_sendlist.sync(); forward_comm_pair_device(pair); diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 88b5414fc6..f380ab40f5 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -21,7 +21,7 @@ #include "atom.h" #include "atom_kokkos.h" #include "atom_masks.h" -#include "comm.h" +#include "comm_kokkos.h" #include "error.h" #include "force.h" #include "kokkos.h" @@ -49,6 +49,7 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : kokkosable = 1; forward_comm_device = 1; atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK; @@ -81,8 +82,8 @@ FixQEqReaxKokkos::~FixQEqReaxKokkos() template void FixQEqReaxKokkos::init() { - atomKK->k_q.modify(); - atomKK->k_q.sync(); + atomKK->modified(Host,Q_MASK); + atomKK->sync(execution_space,Q_MASK); FixQEqReax::init(); @@ -1012,9 +1013,9 @@ void FixQEqReaxKokkos::calculate_q() pack_flag = 4; //comm->forward_comm_fix( this ); //Dist_vector( atom->q ); - atomKK->k_q.modify(); + atomKK->modified(execution_space,Q_MASK); comm->forward_comm_fix(this); - atomKK->k_q.sync(); + atomKK->sync(execution_space,Q_MASK); } @@ -1349,6 +1350,7 @@ int FixQEqReaxKokkos::pack_forward_comm_fix_kokkos(int n, DAT::tdual int iswap_in, DAT::tdual_xfloat_1d &k_buf, int /*pbc_flag*/, int * /*pbc*/) { + k_sendlist.sync(); d_sendlist = k_sendlist.view(); iswap = iswap_in; d_buf = k_buf.view(); @@ -1434,11 +1436,11 @@ void FixQEqReaxKokkos::unpack_forward_comm(int n, int first, double } else if (pack_flag == 2) { k_s.sync_host(); for (m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m]; - k_d.modify_host(); + k_s.modify_host(); } else if (pack_flag == 3) { k_t.sync_host(); for (m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m]; - k_d.modify_host(); + k_t.modify_host(); } else if (pack_flag == 4) { atomKK->sync(Host,Q_MASK); for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 3be19e6190..11116b0c93 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -39,7 +39,7 @@ GPU_AWARE_UNKNOWN #elif defined(KOKKOS_ENABLE_CUDA) -// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0 +// OpenMPI supports detecting GPU-aware MPI as of version 2.0.0 #if (OPEN_MPI) #if (OMPI_MAJOR_VERSION >= 2) @@ -149,7 +149,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) if (ngpus > 1 && !set_flag) error->all(FLERR,"Could not determine local MPI rank for multiple " - "GPUs with Kokkos CUDA because MPI library not recognized"); + "GPUs with Kokkos CUDA or HIP because MPI library not recognized"); } else if (strcmp(arg[iarg],"t") == 0 || strcmp(arg[iarg],"threads") == 0) { @@ -210,7 +210,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) forward_pair_comm_classic = forward_fix_comm_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; - forward_pair_comm_on_host = forward_fix_comm_on_host = 0; } else { if (nthreads > 1) { neighflag = HALFTHREAD; @@ -225,13 +224,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) forward_pair_comm_classic = forward_fix_comm_classic = 1; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; - forward_pair_comm_on_host = forward_fix_comm_on_host = 0; } #ifdef LMP_KOKKOS_GPU - // check and warn about CUDA-aware MPI availability when using multiple MPI tasks - // change default only if we can safely detect that CUDA-aware MPI is not available + // check and warn about GPU-aware MPI availability when using multiple MPI tasks + // change default only if we can safely detect that GPU-aware MPI is not available int nmpi = 0; MPI_Comm_size(world,&nmpi); @@ -254,14 +252,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) if (!gpu_aware_flag) if (me == 0) - error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI"); + error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling GPU-aware MPI"); } #endif if (gpu_aware_flag == 1 && have_gpu_aware == 0) { if (me == 0) - error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, " - "use '-pk kokkos cuda/aware on' to override"); + error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, " + "use '-pk kokkos gpu/aware on' to override"); gpu_aware_flag = 0; } else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative? // MVAPICH2 @@ -274,17 +272,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) if (!gpu_aware_flag) if (me == 0) - error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI"); + error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling GPU-aware MPI"); // pure MPICH or some unsupported MPICH derivative #elif defined(MPICH) && !defined(MVAPICH2_VERSION) if (me == 0) - error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI"); + error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI"); gpu_aware_flag = 0; #else if (me == 0) - error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available," + error->warning(FLERR,"Kokkos with CUDA or HIP assumes GPU-aware MPI is available," " but cannot determine if this is the case\n try" - " '-pk kokkos cuda/aware off' if getting segmentation faults"); + " '-pk kokkos gpu/aware off' if getting segmentation faults"); #endif } // if (-1 == have_gpu_aware) @@ -352,19 +350,16 @@ void KokkosLMP::accelerator(int narg, char **arg) forward_pair_comm_classic = forward_fix_comm_classic = 1; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; - forward_pair_comm_on_host = forward_fix_comm_on_host = 0; } else if (strcmp(arg[iarg+1],"host") == 0) { exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; forward_pair_comm_classic = forward_fix_comm_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1; - forward_pair_comm_on_host = forward_fix_comm_on_host = 1; } else if (strcmp(arg[iarg+1],"device") == 0) { exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; forward_pair_comm_classic = forward_fix_comm_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; - forward_pair_comm_on_host = forward_fix_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; } else if (strcmp(arg[iarg],"comm/exchange") == 0) { @@ -394,25 +389,15 @@ void KokkosLMP::accelerator(int narg, char **arg) } else if (strcmp(arg[iarg],"comm/pair/forward") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"no") == 0) forward_pair_comm_classic = 1; - else if (strcmp(arg[iarg+1],"host") == 0) { - forward_pair_comm_classic = 0; - forward_pair_comm_on_host = 1; - } else if (strcmp(arg[iarg+1],"device") == 0) { - forward_pair_comm_classic = 0; - forward_pair_comm_on_host = 0; - } else error->all(FLERR,"Illegal package kokkos command"); + else if (strcmp(arg[iarg+1],"device") == 0) forward_pair_comm_classic = 0; + else error->all(FLERR,"Illegal package kokkos command"); forward_pair_comm_changed = 0; iarg += 2; } else if (strcmp(arg[iarg],"comm/fix/forward") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1; - else if (strcmp(arg[iarg+1],"host") == 0) { - forward_fix_comm_classic = 0; - forward_fix_comm_on_host = 1; - } else if (strcmp(arg[iarg+1],"device") == 0) { - forward_fix_comm_classic = 0; - forward_fix_comm_on_host = 0; - } else error->all(FLERR,"Illegal package kokkos command"); + else if (strcmp(arg[iarg+1],"device") == 0) forward_fix_comm_classic = 0; + else error->all(FLERR,"Illegal package kokkos command"); forward_fix_comm_changed = 0; iarg += 2; } else if (strcmp(arg[iarg],"comm/reverse") == 0) { @@ -427,7 +412,7 @@ void KokkosLMP::accelerator(int narg, char **arg) } else error->all(FLERR,"Illegal package kokkos command"); reverse_comm_changed = 0; iarg += 2; - } else if (strcmp(arg[iarg],"cuda/aware") == 0) { + } else if (strcmp(arg[iarg],"gpu/aware") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0; else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1; @@ -448,52 +433,52 @@ void KokkosLMP::accelerator(int narg, char **arg) int nmpi = 0; MPI_Comm_size(world,&nmpi); - // if "cuda/aware off" and "comm device", change to "comm host" + // if "gpu/aware off" and "comm device", change to "comm classic" if (!gpu_aware_flag && nmpi > 1) { if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) { - exchange_comm_on_host = 1; + exchange_comm_classic = 1; exchange_comm_changed = 1; } if (forward_comm_classic == 0 && forward_comm_on_host == 0) { - forward_comm_on_host = 1; + forward_comm_classic = 1; forward_comm_changed = 1; } - if (forward_pair_comm_classic == 0 && forward_pair_comm_on_host == 0) { - forward_pair_comm_on_host = 1; + if (forward_pair_comm_classic == 0) { + forward_pair_comm_classic = 1; forward_pair_comm_changed = 1; } - if (forward_fix_comm_classic == 0 && forward_fix_comm_on_host == 0) { - forward_fix_comm_on_host = 1; + if (forward_fix_comm_classic == 0) { + forward_fix_comm_classic = 1; forward_fix_comm_changed = 1; } if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) { - reverse_comm_on_host = 1; + reverse_comm_classic = 1; reverse_comm_changed = 1; } } - // if "cuda/aware on" and comm flags were changed previously, change them back + // if "gpu/aware on" and comm flags were changed previously, change them back if (gpu_aware_flag) { if (exchange_comm_changed) { - exchange_comm_on_host = 0; + exchange_comm_classic = 0; exchange_comm_changed = 0; } if (forward_comm_changed) { - forward_comm_on_host = 0; + forward_comm_classic = 0; forward_comm_changed = 0; } if (forward_pair_comm_changed) { - forward_pair_comm_on_host = 0; + forward_pair_classic = 0; forward_pair_comm_changed = 0; } if (forward_fix_comm_changed) { - forward_fix_comm_on_host = 0; + forward_fix_comm_classic = 0; forward_fix_comm_changed = 0; } if (reverse_comm_changed) { - reverse_comm_on_host = 0; + reverse_comm_classic = 0; reverse_comm_changed = 0; } } diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 05421186c0..d0c8432af0 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -512,12 +512,17 @@ void PairMultiLucyRXKokkos::computeLocalDensity() atomKK->modified(execution_space,DPDRHO_MASK); - // communicate and sum densities (on the host) + // communicate and sum densities - if (newton_pair) + atomKK->modified(execution_space,DPDRHO_MASK); + + if (newton_pair) { comm->reverse_comm_pair(this); + atomKK->sync(execution_space,DPDRHO_MASK); + } comm->forward_comm_pair(this); + atomKK->sync(execution_space,DPDRHO_MASK); } template @@ -686,8 +691,6 @@ void PairMultiLucyRXKokkos::unpack_forward_comm_kokkos(int n, int fi first = first_in; v_buf = buf.view(); Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); - - atomKK->modified(execution_space,DPDRHO_MASK); } template @@ -721,6 +724,8 @@ void PairMultiLucyRXKokkos::unpack_forward_comm(int n, int first, do { int i,m,last; + atomKK->sync(Host,DPDRHO_MASK); + m = 0; last = first + n; for (i = first; i < last; i++) h_rho[i] = buf[m++]; @@ -750,6 +755,8 @@ void PairMultiLucyRXKokkos::unpack_reverse_comm(int n, int *list, do { int i,j,m; + atomKK->sync(Host,DPDRHO_MASK); + m = 0; for (i = 0; i < n; i++) { j = list[i];