Rename Kokkos variables
This commit is contained in:
@ -78,9 +78,9 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
// process any command-line args that invoke Kokkos settings
|
||||
|
||||
ngpu = 0;
|
||||
ngpus = 0;
|
||||
int device = 0;
|
||||
num_threads = 1;
|
||||
nthreads = 1;
|
||||
numa = 1;
|
||||
|
||||
int iarg = 0;
|
||||
@ -96,7 +96,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
error->all(FLERR,"GPUs are requested but Kokkos has not been compiled for CUDA");
|
||||
#endif
|
||||
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
|
||||
ngpu = atoi(arg[iarg+1]);
|
||||
ngpus = atoi(arg[iarg+1]);
|
||||
|
||||
int skip_gpu = 9999;
|
||||
if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
|
||||
@ -108,23 +108,23 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
char *str;
|
||||
if ((str = getenv("SLURM_LOCALID"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ngpu;
|
||||
device = local_rank % ngpus;
|
||||
if (device >= skip_gpu) device++;
|
||||
}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ngpu;
|
||||
device = local_rank % ngpus;
|
||||
if (device >= skip_gpu) device++;
|
||||
}
|
||||
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ngpu;
|
||||
device = local_rank % ngpus;
|
||||
if (device >= skip_gpu) device++;
|
||||
}
|
||||
|
||||
} else if (strcmp(arg[iarg],"t") == 0 ||
|
||||
strcmp(arg[iarg],"threads") == 0) {
|
||||
num_threads = atoi(arg[iarg+1]);
|
||||
nthreads = atoi(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg],"n") == 0 ||
|
||||
@ -138,12 +138,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
// initialize Kokkos
|
||||
|
||||
if (me == 0) {
|
||||
if (screen) fprintf(screen," will use up to %d GPU(s) per node\n",ngpu);
|
||||
if (logfile) fprintf(logfile," will use up to %d GPU(s) per node\n",ngpu);
|
||||
if (screen) fprintf(screen," will use up to %d GPU(s) per node\n",ngpus);
|
||||
if (logfile) fprintf(logfile," will use up to %d GPU(s) per node\n",ngpus);
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
if (ngpu <= 0)
|
||||
if (ngpus <= 0)
|
||||
error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested");
|
||||
|
||||
// check and warn about GPU-direct availability when using multiple MPI tasks
|
||||
@ -167,14 +167,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
#endif
|
||||
|
||||
#ifndef KOKKOS_ENABLE_SERIAL
|
||||
if (num_threads == 1)
|
||||
if (nthreads == 1)
|
||||
error->warning(FLERR,"When using a single thread, the Kokkos Serial backend "
|
||||
"(i.e. Makefile.kokkos_mpi_only) gives better performance "
|
||||
"than the OpenMP backend");
|
||||
#endif
|
||||
|
||||
Kokkos::InitArguments args;
|
||||
args.num_threads = num_threads;
|
||||
args.num_threads = nthreads;
|
||||
args.num_numa = numa;
|
||||
args.device_id = device;
|
||||
|
||||
@ -184,7 +184,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
binsize = 0.0;
|
||||
gpu_direct_flag = 1;
|
||||
if (ngpu > 0) {
|
||||
if (ngpus > 0) {
|
||||
neighflag = FULL;
|
||||
neighflag_qeq = FULL;
|
||||
neighflag_qeq_set = 0;
|
||||
@ -192,7 +192,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else {
|
||||
if (num_threads > 1) {
|
||||
if (nthreads > 1) {
|
||||
neighflag = HALFTHREAD;
|
||||
neighflag_qeq = HALFTHREAD;
|
||||
} else {
|
||||
@ -236,7 +236,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"full") == 0) neighflag = FULL;
|
||||
else if (strcmp(arg[iarg+1],"half") == 0) {
|
||||
if (num_threads > 1 || ngpu > 0)
|
||||
if (nthreads > 1 || ngpus > 0)
|
||||
neighflag = HALFTHREAD;
|
||||
else
|
||||
neighflag = HALF;
|
||||
@ -248,7 +248,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"full") == 0) neighflag_qeq = FULL;
|
||||
else if (strcmp(arg[iarg+1],"half") == 0) {
|
||||
if (num_threads > 1 || ngpu > 0)
|
||||
if (nthreads > 1 || ngpus > 0)
|
||||
neighflag_qeq = HALFTHREAD;
|
||||
else
|
||||
neighflag_qeq = HALF;
|
||||
|
||||
@ -32,7 +32,7 @@ class KokkosLMP : protected Pointers {
|
||||
int exchange_comm_on_host;
|
||||
int forward_comm_on_host;
|
||||
int reverse_comm_on_host;
|
||||
int num_threads,ngpu;
|
||||
int nthreads,ngpus;
|
||||
int numa;
|
||||
int auto_sync;
|
||||
int gpu_direct_flag;
|
||||
|
||||
@ -362,7 +362,7 @@ void NeighborKokkos::modify_mol_intra_grow_kokkos(){
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::set_binsize_kokkos() {
|
||||
if (!binsizeflag && lmp->kokkos->ngpu > 0) {
|
||||
if (!binsizeflag && lmp->kokkos->ngpus > 0) {
|
||||
binsize_user = cutneighmax;
|
||||
binsizeflag = 1;
|
||||
}
|
||||
|
||||
@ -310,12 +310,12 @@ void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
|
||||
#else // No atomics
|
||||
|
||||
num_threads = lmp->kokkos->num_threads;
|
||||
nthreads = lmp->kokkos->nthreads;
|
||||
int nmax = f.extent(0);
|
||||
if (nmax > t_f.extent(1)) {
|
||||
t_f = t_f_array_thread("pair_exp6_rx:t_f",num_threads,nmax);
|
||||
t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",num_threads,nmax);
|
||||
t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",num_threads,nmax);
|
||||
t_f = t_f_array_thread("pair_exp6_rx:t_f",nthreads,nmax);
|
||||
t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",nthreads,nmax);
|
||||
t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",nthreads,nmax);
|
||||
}
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxZeroDupViews>(0,nmax),*this);
|
||||
@ -1642,7 +1642,7 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxComputeNoAtomics<NEIG
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCollapseDupViews, const int &i) const {
|
||||
for (int n = 0; n < num_threads; n++) {
|
||||
for (int n = 0; n < nthreads; n++) {
|
||||
f(i,0) += t_f(n,i,0);
|
||||
f(i,1) += t_f(n,i,1);
|
||||
f(i,2) += t_f(n,i,2);
|
||||
@ -1654,7 +1654,7 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCollapseDupViews, con
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxZeroDupViews, const int &i) const {
|
||||
for (int n = 0; n < num_threads; n++) {
|
||||
for (int n = 0; n < nthreads; n++) {
|
||||
t_f(n,i,0) = 0.0;
|
||||
t_f(n,i,1) = 0.0;
|
||||
t_f(n,i,2) = 0.0;
|
||||
@ -2105,7 +2105,7 @@ void PairExp6rxKokkos<DeviceType>::getMixingWeights(int id,double &epsilon1,doub
|
||||
void partition_range( const int begin, const int end, int &thread_begin, int &thread_end, const int chunkSize = 1)
|
||||
{
|
||||
int threadId = omp_get_thread_num();
|
||||
int nThreads = omp_get_num_threads();
|
||||
int nThreads = omp_get_nthreads();
|
||||
|
||||
const int len = end - begin;
|
||||
const int nBlocks = (len + (chunkSize - 1)) / chunkSize;
|
||||
|
||||
@ -145,7 +145,7 @@ class PairExp6rxKokkos : public PairExp6rx {
|
||||
int eflag,vflag;
|
||||
int nlocal,newton_pair,neighflag;
|
||||
double special_lj[4];
|
||||
int num_threads,ntypes;
|
||||
int nthreads,ntypes;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_f_array f;
|
||||
|
||||
@ -1656,7 +1656,7 @@ void PPPMKokkos<DeviceType>::make_rho()
|
||||
iy = nyhi_out-nylo_out + 1;
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho> config(lmp->kokkos->num_threads,1);
|
||||
Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho> config(lmp->kokkos->nthreads,1);
|
||||
Kokkos::parallel_for(config,*this);
|
||||
copymode = 0;
|
||||
#endif
|
||||
|
||||
@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
|
||||
RandPoolWrap::RandPoolWrap(int, LAMMPS *lmp) : Pointers(lmp)
|
||||
{
|
||||
random_thr = NULL;
|
||||
nthreads = lmp->kokkos->num_threads;
|
||||
nthreads = lmp->kokkos->nthreads;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -59,7 +59,7 @@ void RandPoolWrap::init(RanMars* random, int seed)
|
||||
// allocate pool of RNGs
|
||||
// generate a random number generator instance for
|
||||
// all threads != 0. make sure we use unique seeds.
|
||||
nthreads = lmp->kokkos->num_threads;
|
||||
nthreads = lmp->kokkos->nthreads;
|
||||
random_thr = new RanMars*[nthreads];
|
||||
for (int tid = 1; tid < nthreads; ++tid) {
|
||||
random_thr[tid] = new RanMars(lmp, seed + comm->me
|
||||
|
||||
@ -50,8 +50,8 @@ namespace LAMMPS_NS {
|
||||
class KokkosLMP {
|
||||
public:
|
||||
int kokkos_exists;
|
||||
int num_threads;
|
||||
int ngpu;
|
||||
int nthreads;
|
||||
int ngpus;
|
||||
int numa;
|
||||
|
||||
KokkosLMP(class LAMMPS *, int, char **) {kokkos_exists = 0;}
|
||||
|
||||
@ -81,7 +81,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
||||
nthreads = 1;
|
||||
#ifdef _OPENMP
|
||||
if (lmp->kokkos) {
|
||||
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
|
||||
nthreads = lmp->kokkos->nthreads * lmp->kokkos->numa;
|
||||
} else if (getenv("OMP_NUM_THREADS") == NULL) {
|
||||
nthreads = 1;
|
||||
if (me == 0)
|
||||
|
||||
@ -176,9 +176,9 @@ void Finish::end(int flag)
|
||||
const char fmt2[] =
|
||||
"%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
|
||||
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,
|
||||
lmp->kokkos->num_threads);
|
||||
lmp->kokkos->nthreads);
|
||||
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,
|
||||
lmp->kokkos->num_threads);
|
||||
lmp->kokkos->nthreads);
|
||||
} else {
|
||||
#if defined(_OPENMP)
|
||||
const char fmt2[] =
|
||||
@ -579,7 +579,7 @@ void Finish::end(int flag)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (lmp->kokkos && lmp->kokkos->ngpu > 0)
|
||||
if (lmp->kokkos && lmp->kokkos->ngpus > 0)
|
||||
if (const char* env_clb = getenv("CUDA_LAUNCH_BLOCKING"))
|
||||
if (!(strcmp(env_clb,"1") == 0)) {
|
||||
error->warning(FLERR,"Timing breakdown may not be accurate "
|
||||
|
||||
Reference in New Issue
Block a user