Remove 'host' option for Kokkos pair and fix comm

This commit is contained in:
Stan Moore
2020-12-22 12:55:44 -07:00
parent 271cbad787
commit 6f852e7b2c
4 changed files with 52 additions and 64 deletions

View File

@ -39,7 +39,7 @@
GPU_AWARE_UNKNOWN
#elif defined(KOKKOS_ENABLE_CUDA)
// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0
// OpenMPI supports detecting GPU-aware MPI as of version 2.0.0
#if (OPEN_MPI)
#if (OMPI_MAJOR_VERSION >= 2)
@ -149,7 +149,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
if (ngpus > 1 && !set_flag)
error->all(FLERR,"Could not determine local MPI rank for multiple "
"GPUs with Kokkos CUDA because MPI library not recognized");
"GPUs with Kokkos CUDA or HIP because MPI library not recognized");
} else if (strcmp(arg[iarg],"t") == 0 ||
strcmp(arg[iarg],"threads") == 0) {
@ -210,7 +210,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
forward_pair_comm_classic = forward_fix_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
} else {
if (nthreads > 1) {
neighflag = HALFTHREAD;
@ -225,13 +224,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
forward_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
}
#ifdef LMP_KOKKOS_GPU
// check and warn about CUDA-aware MPI availability when using multiple MPI tasks
// change default only if we can safely detect that CUDA-aware MPI is not available
// check and warn about GPU-aware MPI availability when using multiple MPI tasks
// change default only if we can safely detect that GPU-aware MPI is not available
int nmpi = 0;
MPI_Comm_size(world,&nmpi);
@ -254,14 +252,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
if (!gpu_aware_flag)
if (me == 0)
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI");
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling GPU-aware MPI");
}
#endif
if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
if (me == 0)
error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, "
"use '-pk kokkos cuda/aware on' to override");
error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, "
"use '-pk kokkos gpu/aware on' to override");
gpu_aware_flag = 0;
} else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
// MVAPICH2
@ -274,17 +272,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
if (!gpu_aware_flag)
if (me == 0)
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI");
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling GPU-aware MPI");
// pure MPICH or some unsupported MPICH derivative
#elif defined(MPICH) && !defined(MVAPICH2_VERSION)
if (me == 0)
error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI");
error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
gpu_aware_flag = 0;
#else
if (me == 0)
error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available,"
error->warning(FLERR,"Kokkos with CUDA or HIP assumes GPU-aware MPI is available,"
" but cannot determine if this is the case\n try"
" '-pk kokkos cuda/aware off' if getting segmentation faults");
" '-pk kokkos gpu/aware off' if getting segmentation faults");
#endif
} // if (-1 == have_gpu_aware)
@ -352,19 +350,16 @@ void KokkosLMP::accelerator(int narg, char **arg)
forward_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
} else if (strcmp(arg[iarg+1],"host") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
forward_pair_comm_on_host = forward_fix_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"comm/exchange") == 0) {
@ -394,25 +389,15 @@ void KokkosLMP::accelerator(int narg, char **arg)
} else if (strcmp(arg[iarg],"comm/pair/forward") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) forward_pair_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) {
forward_pair_comm_classic = 0;
forward_pair_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
forward_pair_comm_classic = 0;
forward_pair_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command");
else if (strcmp(arg[iarg+1],"device") == 0) forward_pair_comm_classic = 0;
else error->all(FLERR,"Illegal package kokkos command");
forward_pair_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) {
forward_fix_comm_classic = 0;
forward_fix_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
forward_fix_comm_classic = 0;
forward_fix_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command");
else if (strcmp(arg[iarg+1],"device") == 0) forward_fix_comm_classic = 0;
else error->all(FLERR,"Illegal package kokkos command");
forward_fix_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"comm/reverse") == 0) {
@ -427,7 +412,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
} else error->all(FLERR,"Illegal package kokkos command");
reverse_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"cuda/aware") == 0) {
} else if (strcmp(arg[iarg],"gpu/aware") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0;
else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
@ -448,52 +433,52 @@ void KokkosLMP::accelerator(int narg, char **arg)
int nmpi = 0;
MPI_Comm_size(world,&nmpi);
// if "cuda/aware off" and "comm device", change to "comm host"
// if "gpu/aware off" and "comm device", change to "comm classic"
if (!gpu_aware_flag && nmpi > 1) {
if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
exchange_comm_on_host = 1;
exchange_comm_classic = 1;
exchange_comm_changed = 1;
}
if (forward_comm_classic == 0 && forward_comm_on_host == 0) {
forward_comm_on_host = 1;
forward_comm_classic = 1;
forward_comm_changed = 1;
}
if (forward_pair_comm_classic == 0 && forward_pair_comm_on_host == 0) {
forward_pair_comm_on_host = 1;
if (forward_pair_comm_classic == 0) {
forward_pair_comm_classic = 1;
forward_pair_comm_changed = 1;
}
if (forward_fix_comm_classic == 0 && forward_fix_comm_on_host == 0) {
forward_fix_comm_on_host = 1;
if (forward_fix_comm_classic == 0) {
forward_fix_comm_classic = 1;
forward_fix_comm_changed = 1;
}
if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) {
reverse_comm_on_host = 1;
reverse_comm_classic = 1;
reverse_comm_changed = 1;
}
}
// if "cuda/aware on" and comm flags were changed previously, change them back
// if "gpu/aware on" and comm flags were changed previously, change them back
if (gpu_aware_flag) {
if (exchange_comm_changed) {
exchange_comm_on_host = 0;
exchange_comm_classic = 0;
exchange_comm_changed = 0;
}
if (forward_comm_changed) {
forward_comm_on_host = 0;
forward_comm_classic = 0;
forward_comm_changed = 0;
}
if (forward_pair_comm_changed) {
forward_pair_comm_on_host = 0;
forward_pair_classic = 0;
forward_pair_comm_changed = 0;
}
if (forward_fix_comm_changed) {
forward_fix_comm_on_host = 0;
forward_fix_comm_classic = 0;
forward_fix_comm_changed = 0;
}
if (reverse_comm_changed) {
reverse_comm_on_host = 0;
reverse_comm_classic = 0;
reverse_comm_changed = 0;
}
}