Remove 'host' option for Kokkos pair and fix comm
This commit is contained in:
@ -39,7 +39,7 @@
|
||||
GPU_AWARE_UNKNOWN
|
||||
#elif defined(KOKKOS_ENABLE_CUDA)
|
||||
|
||||
// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0
|
||||
// OpenMPI supports detecting GPU-aware MPI as of version 2.0.0
|
||||
|
||||
#if (OPEN_MPI)
|
||||
#if (OMPI_MAJOR_VERSION >= 2)
|
||||
@ -149,7 +149,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
if (ngpus > 1 && !set_flag)
|
||||
error->all(FLERR,"Could not determine local MPI rank for multiple "
|
||||
"GPUs with Kokkos CUDA because MPI library not recognized");
|
||||
"GPUs with Kokkos CUDA or HIP because MPI library not recognized");
|
||||
|
||||
} else if (strcmp(arg[iarg],"t") == 0 ||
|
||||
strcmp(arg[iarg],"threads") == 0) {
|
||||
@ -210,7 +210,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
|
||||
} else {
|
||||
if (nthreads > 1) {
|
||||
neighflag = HALFTHREAD;
|
||||
@ -225,13 +224,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
|
||||
}
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
|
||||
// check and warn about CUDA-aware MPI availability when using multiple MPI tasks
|
||||
// change default only if we can safely detect that CUDA-aware MPI is not available
|
||||
// check and warn about GPU-aware MPI availability when using multiple MPI tasks
|
||||
// change default only if we can safely detect that GPU-aware MPI is not available
|
||||
|
||||
int nmpi = 0;
|
||||
MPI_Comm_size(world,&nmpi);
|
||||
@ -254,14 +252,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
if (!gpu_aware_flag)
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI");
|
||||
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling GPU-aware MPI");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, "
|
||||
"use '-pk kokkos cuda/aware on' to override");
|
||||
error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, "
|
||||
"use '-pk kokkos gpu/aware on' to override");
|
||||
gpu_aware_flag = 0;
|
||||
} else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
|
||||
// MVAPICH2
|
||||
@ -274,17 +272,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
if (!gpu_aware_flag)
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI");
|
||||
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling GPU-aware MPI");
|
||||
// pure MPICH or some unsupported MPICH derivative
|
||||
#elif defined(MPICH) && !defined(MVAPICH2_VERSION)
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI");
|
||||
error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
|
||||
gpu_aware_flag = 0;
|
||||
#else
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available,"
|
||||
error->warning(FLERR,"Kokkos with CUDA or HIP assumes GPU-aware MPI is available,"
|
||||
" but cannot determine if this is the case\n try"
|
||||
" '-pk kokkos cuda/aware off' if getting segmentation faults");
|
||||
" '-pk kokkos gpu/aware off' if getting segmentation faults");
|
||||
|
||||
#endif
|
||||
} // if (-1 == have_gpu_aware)
|
||||
@ -352,19 +350,16 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
|
||||
} else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
|
||||
forward_pair_comm_on_host = forward_fix_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
forward_pair_comm_on_host = forward_fix_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/exchange") == 0) {
|
||||
@ -394,25 +389,15 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
} else if (strcmp(arg[iarg],"comm/pair/forward") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) forward_pair_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
forward_pair_comm_classic = 0;
|
||||
forward_pair_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
forward_pair_comm_classic = 0;
|
||||
forward_pair_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
else if (strcmp(arg[iarg+1],"device") == 0) forward_pair_comm_classic = 0;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
forward_pair_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
forward_fix_comm_classic = 0;
|
||||
forward_fix_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
forward_fix_comm_classic = 0;
|
||||
forward_fix_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
else if (strcmp(arg[iarg+1],"device") == 0) forward_fix_comm_classic = 0;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
forward_fix_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/reverse") == 0) {
|
||||
@ -427,7 +412,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
reverse_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"cuda/aware") == 0) {
|
||||
} else if (strcmp(arg[iarg],"gpu/aware") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0;
|
||||
else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
|
||||
@ -448,52 +433,52 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
int nmpi = 0;
|
||||
MPI_Comm_size(world,&nmpi);
|
||||
|
||||
// if "cuda/aware off" and "comm device", change to "comm host"
|
||||
// if "gpu/aware off" and "comm device", change to "comm classic"
|
||||
|
||||
if (!gpu_aware_flag && nmpi > 1) {
|
||||
if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
|
||||
exchange_comm_on_host = 1;
|
||||
exchange_comm_classic = 1;
|
||||
exchange_comm_changed = 1;
|
||||
}
|
||||
if (forward_comm_classic == 0 && forward_comm_on_host == 0) {
|
||||
forward_comm_on_host = 1;
|
||||
forward_comm_classic = 1;
|
||||
forward_comm_changed = 1;
|
||||
}
|
||||
if (forward_pair_comm_classic == 0 && forward_pair_comm_on_host == 0) {
|
||||
forward_pair_comm_on_host = 1;
|
||||
if (forward_pair_comm_classic == 0) {
|
||||
forward_pair_comm_classic = 1;
|
||||
forward_pair_comm_changed = 1;
|
||||
}
|
||||
if (forward_fix_comm_classic == 0 && forward_fix_comm_on_host == 0) {
|
||||
forward_fix_comm_on_host = 1;
|
||||
if (forward_fix_comm_classic == 0) {
|
||||
forward_fix_comm_classic = 1;
|
||||
forward_fix_comm_changed = 1;
|
||||
}
|
||||
if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) {
|
||||
reverse_comm_on_host = 1;
|
||||
reverse_comm_classic = 1;
|
||||
reverse_comm_changed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// if "cuda/aware on" and comm flags were changed previously, change them back
|
||||
// if "gpu/aware on" and comm flags were changed previously, change them back
|
||||
|
||||
if (gpu_aware_flag) {
|
||||
if (exchange_comm_changed) {
|
||||
exchange_comm_on_host = 0;
|
||||
exchange_comm_classic = 0;
|
||||
exchange_comm_changed = 0;
|
||||
}
|
||||
if (forward_comm_changed) {
|
||||
forward_comm_on_host = 0;
|
||||
forward_comm_classic = 0;
|
||||
forward_comm_changed = 0;
|
||||
}
|
||||
if (forward_pair_comm_changed) {
|
||||
forward_pair_comm_on_host = 0;
|
||||
forward_pair_classic = 0;
|
||||
forward_pair_comm_changed = 0;
|
||||
}
|
||||
if (forward_fix_comm_changed) {
|
||||
forward_fix_comm_on_host = 0;
|
||||
forward_fix_comm_classic = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
}
|
||||
if (reverse_comm_changed) {
|
||||
reverse_comm_on_host = 0;
|
||||
reverse_comm_classic = 0;
|
||||
reverse_comm_changed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user