diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index d6a67188bb..632a64cd78 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -78,9 +78,9 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // process any command-line args that invoke Kokkos settings - ngpu = 0; + ngpus = 0; int device = 0; - num_threads = 1; + nthreads = 1; numa = 1; int iarg = 0; @@ -96,7 +96,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) error->all(FLERR,"GPUs are requested but Kokkos has not been compiled for CUDA"); #endif if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args"); - ngpu = atoi(arg[iarg+1]); + ngpus = atoi(arg[iarg+1]); int skip_gpu = 9999; if (iarg+2 < narg && isdigit(arg[iarg+2][0])) { @@ -108,23 +108,23 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) char *str; if ((str = getenv("SLURM_LOCALID"))) { int local_rank = atoi(str); - device = local_rank % ngpu; + device = local_rank % ngpus; if (device >= skip_gpu) device++; } if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { int local_rank = atoi(str); - device = local_rank % ngpu; + device = local_rank % ngpus; if (device >= skip_gpu) device++; } if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) { int local_rank = atoi(str); - device = local_rank % ngpu; + device = local_rank % ngpus; if (device >= skip_gpu) device++; } } else if (strcmp(arg[iarg],"t") == 0 || strcmp(arg[iarg],"threads") == 0) { - num_threads = atoi(arg[iarg+1]); + nthreads = atoi(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"n") == 0 || @@ -138,12 +138,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // initialize Kokkos if (me == 0) { - if (screen) fprintf(screen," will use up to %d GPU(s) per node\n",ngpu); - if (logfile) fprintf(logfile," will use up to %d GPU(s) per node\n",ngpu); + if (screen) fprintf(screen," will use up to %d GPU(s) per node\n",ngpus); + if (logfile) fprintf(logfile," will use up to %d GPU(s) per node\n",ngpus); } #ifdef KOKKOS_ENABLE_CUDA - if (ngpu <= 0) + if (ngpus <= 0) error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested"); // check and warn about GPU-direct availability when using multiple MPI tasks @@ -167,14 +167,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) #endif #ifndef KOKKOS_ENABLE_SERIAL - if (num_threads == 1) + if (nthreads == 1) error->warning(FLERR,"When using a single thread, the Kokkos Serial backend " "(i.e. Makefile.kokkos_mpi_only) gives better performance " "than the OpenMP backend"); #endif Kokkos::InitArguments args; - args.num_threads = num_threads; + args.num_threads = nthreads; args.num_numa = numa; args.device_id = device; @@ -184,7 +184,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) binsize = 0.0; gpu_direct_flag = 1; - if (ngpu > 0) { + if (ngpus > 0) { neighflag = FULL; neighflag_qeq = FULL; neighflag_qeq_set = 0; @@ -192,7 +192,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } else { - if (num_threads > 1) { + if (nthreads > 1) { neighflag = HALFTHREAD; neighflag_qeq = HALFTHREAD; } else { @@ -236,7 +236,7 @@ void KokkosLMP::accelerator(int narg, char **arg) if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"full") == 0) neighflag = FULL; else if (strcmp(arg[iarg+1],"half") == 0) { - if (num_threads > 1 || ngpu > 0) + if (nthreads > 1 || ngpus > 0) neighflag = HALFTHREAD; else neighflag = HALF; @@ -248,7 +248,7 @@ void KokkosLMP::accelerator(int narg, char **arg) if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"full") == 0) neighflag_qeq = FULL; else if (strcmp(arg[iarg+1],"half") == 0) { - if (num_threads > 1 || ngpu > 0) + if (nthreads > 1 || ngpus > 0) neighflag_qeq = HALFTHREAD; else neighflag_qeq = HALF; diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 74a10883f6..a0d0163b46 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -32,7 +32,7 @@ class KokkosLMP : protected Pointers { int exchange_comm_on_host; int forward_comm_on_host; int reverse_comm_on_host; - int num_threads,ngpu; + int nthreads,ngpus; int numa; int auto_sync; int gpu_direct_flag; diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 7aaeda4b37..e912956a3f 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -362,7 +362,7 @@ void NeighborKokkos::modify_mol_intra_grow_kokkos(){ /* ---------------------------------------------------------------------- */ void NeighborKokkos::set_binsize_kokkos() { - if (!binsizeflag && lmp->kokkos->ngpu > 0) { + if (!binsizeflag && lmp->kokkos->ngpus > 0) { binsize_user = cutneighmax; binsizeflag = 1; } diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 0a6372fdf8..2c21d7d6d7 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -310,12 +310,12 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) #else // No atomics - num_threads = lmp->kokkos->num_threads; + nthreads = lmp->kokkos->nthreads; int nmax = f.extent(0); if (nmax > t_f.extent(1)) { - t_f = t_f_array_thread("pair_exp6_rx:t_f",num_threads,nmax); - t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",num_threads,nmax); - t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",num_threads,nmax); + t_f = t_f_array_thread("pair_exp6_rx:t_f",nthreads,nmax); + t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",nthreads,nmax); + t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",nthreads,nmax); } Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); @@ -1642,7 +1642,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxCollapseDupViews, const int &i) const { - for (int n = 0; n < num_threads; n++) { + for (int n = 0; n < nthreads; n++) { f(i,0) += t_f(n,i,0); f(i,1) += t_f(n,i,1); f(i,2) += t_f(n,i,2); @@ -1654,7 +1654,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCollapseDupViews, con template KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxZeroDupViews, const int &i) const { - for (int n = 0; n < num_threads; n++) { + for (int n = 0; n < nthreads; n++) { t_f(n,i,0) = 0.0; t_f(n,i,1) = 0.0; t_f(n,i,2) = 0.0; @@ -2105,7 +2105,7 @@ void PairExp6rxKokkos::getMixingWeights(int id,double &epsilon1,doub void partition_range( const int begin, const int end, int &thread_begin, int &thread_end, const int chunkSize = 1) { int threadId = omp_get_thread_num(); - int nThreads = omp_get_num_threads(); + int nThreads = omp_get_nthreads(); const int len = end - begin; const int nBlocks = (len + (chunkSize - 1)) / chunkSize; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 5e44048ae2..f3801db631 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -145,7 +145,7 @@ class PairExp6rxKokkos : public PairExp6rx { int eflag,vflag; int nlocal,newton_pair,neighflag; double special_lj[4]; - int num_threads,ntypes; + int nthreads,ntypes; typename AT::t_x_array_randomread x; typename AT::t_f_array f; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index c233ca6264..7c01adc510 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1656,7 +1656,7 @@ void PPPMKokkos::make_rho() iy = nyhi_out-nylo_out + 1; copymode = 1; - Kokkos::TeamPolicy config(lmp->kokkos->num_threads,1); + Kokkos::TeamPolicy config(lmp->kokkos->nthreads,1); Kokkos::parallel_for(config,*this); copymode = 0; #endif diff --git a/src/KOKKOS/rand_pool_wrap_kokkos.cpp b/src/KOKKOS/rand_pool_wrap_kokkos.cpp index 39b91f1600..51ebcb154e 100644 --- a/src/KOKKOS/rand_pool_wrap_kokkos.cpp +++ b/src/KOKKOS/rand_pool_wrap_kokkos.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; RandPoolWrap::RandPoolWrap(int, LAMMPS *lmp) : Pointers(lmp) { random_thr = NULL; - nthreads = lmp->kokkos->num_threads; + nthreads = lmp->kokkos->nthreads; } /* ---------------------------------------------------------------------- */ @@ -59,7 +59,7 @@ void RandPoolWrap::init(RanMars* random, int seed) // allocate pool of RNGs // generate a random number generator instance for // all threads != 0. make sure we use unique seeds. - nthreads = lmp->kokkos->num_threads; + nthreads = lmp->kokkos->nthreads; random_thr = new RanMars*[nthreads]; for (int tid = 1; tid < nthreads; ++tid) { random_thr[tid] = new RanMars(lmp, seed + comm->me diff --git a/src/accelerator_kokkos.h b/src/accelerator_kokkos.h index e64bde24be..74bd470572 100644 --- a/src/accelerator_kokkos.h +++ b/src/accelerator_kokkos.h @@ -50,8 +50,8 @@ namespace LAMMPS_NS { class KokkosLMP { public: int kokkos_exists; - int num_threads; - int ngpu; + int nthreads; + int ngpus; int numa; KokkosLMP(class LAMMPS *, int, char **) {kokkos_exists = 0;} diff --git a/src/comm.cpp b/src/comm.cpp index 30fd7c243e..052de93793 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -81,7 +81,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) nthreads = 1; #ifdef _OPENMP if (lmp->kokkos) { - nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa; + nthreads = lmp->kokkos->nthreads * lmp->kokkos->numa; } else if (getenv("OMP_NUM_THREADS") == NULL) { nthreads = 1; if (me == 0) diff --git a/src/finish.cpp b/src/finish.cpp index 9ad8b44927..1baa6d6fda 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -176,9 +176,9 @@ void Finish::end(int flag) const char fmt2[] = "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; if (screen) fprintf(screen,fmt2,cpu_loop,nprocs, - lmp->kokkos->num_threads); + lmp->kokkos->nthreads); if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs, - lmp->kokkos->num_threads); + lmp->kokkos->nthreads); } else { #if defined(_OPENMP) const char fmt2[] = @@ -579,7 +579,7 @@ void Finish::end(int flag) } #endif - if (lmp->kokkos && lmp->kokkos->ngpu > 0) + if (lmp->kokkos && lmp->kokkos->ngpus > 0) if (const char* env_clb = getenv("CUDA_LAUNCH_BLOCKING")) if (!(strcmp(env_clb,"1") == 0)) { error->warning(FLERR,"Timing breakdown may not be accurate "