From 288629bd139c43ae2bb87b40f868a1e4a7ae1fd0 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 31 Oct 2023 11:03:37 -0500 Subject: [PATCH 001/116] Added an RMS accuracy estimate for AMOEBA/HIPPO --- src/AMOEBA/amoeba_multipole.cpp | 60 +++++++++++++++++++++++++++++++++ src/AMOEBA/pair_amoeba.cpp | 16 +++++++-- src/AMOEBA/pair_amoeba.h | 4 +++ 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/AMOEBA/amoeba_multipole.cpp b/src/AMOEBA/amoeba_multipole.cpp index a1503a91f3..8eea5083ea 100644 --- a/src/AMOEBA/amoeba_multipole.cpp +++ b/src/AMOEBA/amoeba_multipole.cpp @@ -18,6 +18,7 @@ #include "atom.h" #include "comm.h" #include "domain.h" +#include "force.h" #include "math_const.h" #include "math_special.h" #include "neigh_list.h" @@ -29,6 +30,8 @@ using namespace LAMMPS_NS; using namespace MathConst; using MathSpecial::square; +using MathSpecial::powint; +using MathSpecial::powsinxx; enum{FIELD,ZRSD,TORQUE,UFLD}; // reverse comm enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG}; @@ -1000,3 +1003,60 @@ void PairAmoeba::damppole(double r, int rorder, double alphai, double alphak, } } } + +/* ---------------------------------------------------------------------- + estimate the accuracy of m_kspace solver based on the monopoles +------------------------------------------------------------------------- */ + +double PairAmoeba::final_accuracy_mpole() +{ + const int nlocal = atom->nlocal; + double qsqsum_local(0.0), qsqsum; + for (int i = 0; i < nlocal; i++) { + qsqsum_local += rpole[i][0]*rpole[i][0]; + } + MPI_Allreduce(&qsqsum_local,&qsqsum,1,MPI_DOUBLE,MPI_SUM,world); + double q2 = qsqsum * force->qqrd2e; + + const double * const prd = domain->prd; + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double slab_volfactor = 1.0; + const double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + int nx_fft = m_kspace->nx; + int ny_fft = m_kspace->ny; + int nz_fft = m_kspace->nz; + double cutoff = mpolecut; + + double lprx = rms(nx_fft,xprd,natoms,aeewald,q2); + double lpry = rms(ny_fft,yprd,natoms,aeewald,q2); + double lprz = rms(nz_fft,zprd_slab,natoms,aeewald,q2); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-aeewald*aeewald*cutoff*cutoff); + double tpr = 0; //estimate_table_accuracy(q2_over_sqrt,spr); + double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + two_charge_force = force->qqr2e * + (force->qelectron * force->qelectron) / + (force->angstrom * force->angstrom); + + return estimated_accuracy; +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double PairAmoeba::rms(int km, double prd, bigint natoms, double g_ewald, double q2) +{ + if (natoms == 0) natoms = 1; // avoid division by zero + double value = 2.0*q2*g_ewald/prd * + sqrt(1.0/(MY_PI*km*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); + + return value; +} diff --git a/src/AMOEBA/pair_amoeba.cpp b/src/AMOEBA/pair_amoeba.cpp index 72efa76523..577332e1b4 100644 --- a/src/AMOEBA/pair_amoeba.cpp +++ b/src/AMOEBA/pair_amoeba.cpp @@ -347,7 +347,7 @@ void PairAmoeba::compute(int eflag, int vflag) } } - first_flag_compute = 0; +// first_flag_compute = 0; // ------------------------------------------------------------------- // end of one-time initializations @@ -428,6 +428,12 @@ void PairAmoeba::compute(int eflag, int vflag) else cfstyle = SETUP_HIPPO; comm->forward_comm(this); + // output FF settings to screen and logfile + // delay until here because force RMS accuracy is computed based on rpole + + if (first_flag_compute && (comm->me == 0)) print_settings(); + first_flag_compute = 0; + if (amoeba) pbc_xred(); time1 = platform::walltime(); @@ -980,7 +986,7 @@ void PairAmoeba::init_style() // output FF settings to screen and logfile - if (first_flag && (comm->me == 0)) print_settings(); + //if (first_flag && (comm->me == 0)) print_settings(); // all done with one-time initializations @@ -1098,9 +1104,13 @@ void PairAmoeba::print_settings() if (use_ewald) { choose(MPOLE_LONG); - mesg += fmt::format(" multipole: cut {} aewald {} bsorder {} FFT {} {} {} " + double estimated_accuracy = final_accuracy_mpole(); + mesg += fmt::format(" multipole: cut {} aewald {} bsorder {} FFT {} {} {}; " + "estimated absolute RMS force accuracy = {:.8g}; " + "estimated relative RMS force accuracy = {:.8g}; " "mscale {} {} {} {}\n", sqrt(off2),aewald,bseorder,nefft1,nefft2,nefft3, + estimated_accuracy,estimated_accuracy/two_charge_force, special_mpole[1],special_mpole[2],special_mpole[3],special_mpole[4]); } else { choose(MPOLE); diff --git a/src/AMOEBA/pair_amoeba.h b/src/AMOEBA/pair_amoeba.h index 1f3a4b799a..648fc86126 100644 --- a/src/AMOEBA/pair_amoeba.h +++ b/src/AMOEBA/pair_amoeba.h @@ -419,6 +419,10 @@ class PairAmoeba : public Pair { double ewaldcof(double); int factorable(int); + double final_accuracy_mpole(); + double rms(int km, double prd, bigint natoms, double g_ewald, double q2); + double two_charge_force; + // debug methods FILE *fp_uind; From 9807316e7c831f2d47e7b23a3dbcf588a107c9c3 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 31 Oct 2023 11:15:24 -0500 Subject: [PATCH 002/116] Cleaned up and added comments --- src/AMOEBA/amoeba_multipole.cpp | 3 +-- src/AMOEBA/pair_amoeba.cpp | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/AMOEBA/amoeba_multipole.cpp b/src/AMOEBA/amoeba_multipole.cpp index 8eea5083ea..fb84e818e9 100644 --- a/src/AMOEBA/amoeba_multipole.cpp +++ b/src/AMOEBA/amoeba_multipole.cpp @@ -30,8 +30,6 @@ using namespace LAMMPS_NS; using namespace MathConst; using MathSpecial::square; -using MathSpecial::powint; -using MathSpecial::powsinxx; enum{FIELD,ZRSD,TORQUE,UFLD}; // reverse comm enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG}; @@ -1006,6 +1004,7 @@ void PairAmoeba::damppole(double r, int rorder, double alphai, double alphak, /* ---------------------------------------------------------------------- estimate the accuracy of m_kspace solver based on the monopoles + based on Ewald ------------------------------------------------------------------------- */ double PairAmoeba::final_accuracy_mpole() diff --git a/src/AMOEBA/pair_amoeba.cpp b/src/AMOEBA/pair_amoeba.cpp index 577332e1b4..4fccd7c2b8 100644 --- a/src/AMOEBA/pair_amoeba.cpp +++ b/src/AMOEBA/pair_amoeba.cpp @@ -429,7 +429,7 @@ void PairAmoeba::compute(int eflag, int vflag) comm->forward_comm(this); // output FF settings to screen and logfile - // delay until here because force RMS accuracy is computed based on rpole + // delay until here because RMS force accuracy is computed based on rpole if (first_flag_compute && (comm->me == 0)) print_settings(); first_flag_compute = 0; @@ -984,8 +984,8 @@ void PairAmoeba::init_style() for (int i = 0; i < nlocal; i++) pval[i] = 0.0; } - // output FF settings to screen and logfile - + // output FF settings to screen and logfile: + // delay until rpole are available for RMS force accuracy estimate //if (first_flag && (comm->me == 0)) print_settings(); // all done with one-time initializations From 64fa32cf1e7ffe0a6e3eed1201412f03045da5d9 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 9 Nov 2023 10:13:23 -0600 Subject: [PATCH 003/116] Removed commented lines --- src/AMOEBA/amoeba_multipole.cpp | 2 +- src/AMOEBA/pair_amoeba.cpp | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/AMOEBA/amoeba_multipole.cpp b/src/AMOEBA/amoeba_multipole.cpp index fb84e818e9..d0ae03401a 100644 --- a/src/AMOEBA/amoeba_multipole.cpp +++ b/src/AMOEBA/amoeba_multipole.cpp @@ -1036,7 +1036,7 @@ double PairAmoeba::final_accuracy_mpole() double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); double spr = 2.0 *q2_over_sqrt * exp(-aeewald*aeewald*cutoff*cutoff); - double tpr = 0; //estimate_table_accuracy(q2_over_sqrt,spr); + double tpr = 0; double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); two_charge_force = force->qqr2e * diff --git a/src/AMOEBA/pair_amoeba.cpp b/src/AMOEBA/pair_amoeba.cpp index 4fccd7c2b8..cad9e2b628 100644 --- a/src/AMOEBA/pair_amoeba.cpp +++ b/src/AMOEBA/pair_amoeba.cpp @@ -347,8 +347,6 @@ void PairAmoeba::compute(int eflag, int vflag) } } -// first_flag_compute = 0; - // ------------------------------------------------------------------- // end of one-time initializations // ------------------------------------------------------------------- @@ -984,10 +982,6 @@ void PairAmoeba::init_style() for (int i = 0; i < nlocal; i++) pval[i] = 0.0; } - // output FF settings to screen and logfile: - // delay until rpole are available for RMS force accuracy estimate - //if (first_flag && (comm->me == 0)) print_settings(); - // all done with one-time initializations first_flag = 0; From e6d31485335cb3f239d3d722d0bddd151bbd4cc3 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Thu, 9 Nov 2023 23:34:01 -0500 Subject: [PATCH 004/116] add method=pimd support for Langevin thermostat --- src/REPLICA/fix_pimd_langevin.cpp | 120 +++++++++++++++++++----------- 1 file changed, 76 insertions(+), 44 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index cffaf327e4..d4e2b6e526 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -50,7 +50,7 @@ using namespace FixConst; using MathConst::MY_PI; using MathConst::THIRD; -enum { NMPIMD }; +enum { PIMD, NMPIMD }; enum { PHYSICAL, NORMAL }; enum { BAOAB, OBABO }; enum { ISO, ANISO, TRICLINIC }; @@ -121,6 +121,8 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : if (strcmp(arg[i], "method") == 0) { if (strcmp(arg[i + 1], "nmpimd") == 0) method = NMPIMD; + elif (strcmp(arg[i + 1], "pimd") == 0) + method = PIMD; else error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin"); } else if (strcmp(arg[i], "integrator") == 0) { @@ -159,7 +161,7 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : error->universe_all(FLERR, "Invalid fmass value for fix pimd/langevin"); } else if (strcmp(arg[i], "sp") == 0) { sp = utils::numeric(FLERR, arg[i + 1], false, lmp); - if (sp < 0.0) error->universe_all(FLERR, "Invalid sp value for fix pimd/nvt"); + if (sp < 0.0) error->universe_all(FLERR, "Invalid sp value for fix pimd/langevin"); } else if (strcmp(arg[i], "fmmode") == 0) { if (strcmp(arg[i + 1], "physical") == 0) fmmode = PHYSICAL; @@ -170,9 +172,11 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : "Unknown fictitious mass mode for fix pimd/langevin. Only physical " "mass and normal mode mass are supported!"); } else if (strcmp(arg[i], "scale") == 0) { + if (method == PIMD) + error->universe_all(FLERR, "The scale parameter of the PILE_L thermostat is not supported for method pimd. Delete scale parameter if you do want to use method pimd."); pilescale = utils::numeric(FLERR, arg[i + 1], false, lmp); if (pilescale < 0.0) - error->universe_all(FLERR, "Invalid pile scale value for fix pimd/langevin"); + error->universe_all(FLERR, "Invalid PILE_L scale value for fix pimd/langevin"); } else if (strcmp(arg[i], "temp") == 0) { temp = utils::numeric(FLERR, arg[i + 1], false, lmp); if (temp < 0.0) error->universe_all(FLERR, "Invalid temp value for fix pimd/langevin"); @@ -245,6 +249,12 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : error->universe_all( FLERR, fmt::format("Must not use pressure coupling with {} ensemble", Ensembles[ensemble])); + if (method == PIMD && pstat_flag) + error->universe_all(FLERR, "Pressure control has not been supported for method pimd yet. Please set method to nmpimd."); + + if (method == PIMD && fmmode == NORMAL) + error->universe_all(FLERR, "Normal mode mass is not supported for method pimd. Please set method to nmpimd."); + /* Initiation */ global_freq = 1; @@ -890,20 +900,22 @@ void FixPIMDLangevin::langevin_init() _omega_k = new double[np]; Lan_c = new double[np]; Lan_s = new double[np]; - if (fmmode == PHYSICAL) { - for (int i = 0; i < np; i++) { - _omega_k[i] = _omega_np * sqrt(lam[i]) / sqrt(fmass); - Lan_c[i] = cos(sqrt(lam[i]) * _omega_np_dt_half); - Lan_s[i] = sin(sqrt(lam[i]) * _omega_np_dt_half); + if (method == NMPIMD) { + if (fmmode == PHYSICAL) { + for (int i = 0; i < np; i++) { + _omega_k[i] = _omega_np * sqrt(lam[i]) / sqrt(fmass); + Lan_c[i] = cos(sqrt(lam[i]) * _omega_np_dt_half); + Lan_s[i] = sin(sqrt(lam[i]) * _omega_np_dt_half); + } + } else if (fmmode == NORMAL) { + for (int i = 0; i < np; i++) { + _omega_k[i] = _omega_np / sqrt(fmass); + Lan_c[i] = cos(_omega_np_dt_half); + Lan_s[i] = sin(_omega_np_dt_half); + } + } else { + error->universe_all(FLERR, "Unknown fmmode setting; only physical and normal are supported!"); } - } else if (fmmode == NORMAL) { - for (int i = 0; i < np; i++) { - _omega_k[i] = _omega_np / sqrt(fmass); - Lan_c[i] = cos(_omega_np_dt_half); - Lan_s[i] = sin(_omega_np_dt_half); - } - } else { - error->universe_all(FLERR, "Unknown fmmode setting; only physical and normal are supported!"); } if (tau > 0) @@ -925,27 +937,35 @@ void FixPIMDLangevin::langevin_init() if (thermostat == PILE_L) { std::string out = "\nInitializing PI Langevin equation thermostat...\n"; out += "Bead ID | omega | tau | c1 | c2\n"; - tau_k = new double[np]; - c1_k = new double[np]; - c2_k = new double[np]; - tau_k[0] = tau; - c1_k[0] = c1; - c2_k[0] = c2; - for (int i = 1; i < np; i++) { - tau_k[i] = 0.5 / pilescale / _omega_k[i]; - if (integrator == OBABO) - c1_k[i] = exp(-0.5 * update->dt / tau_k[i]); - else if (integrator == BAOAB) - c1_k[i] = exp(-1.0 * update->dt / tau_k[i]); - else - error->universe_all(FLERR, - "Unknown integrator parameter for fix pimd/langevin. Only obabo and " - "baoab integrators are supported!"); - c2_k[i] = sqrt(1.0 - c1_k[i] * c1_k[i]); - } - for (int i = 0; i < np; i++) { - out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_k[i], tau_k[i], - c1_k[i], c2_k[i]); + if (method == NMPIMD) { + tau_k = new double[np]; + c1_k = new double[np]; + c2_k = new double[np]; + tau_k[0] = tau; + c1_k[0] = c1; + c2_k[0] = c2; + for (int i = 1; i < np; i++) { + tau_k[i] = 0.5 / pilescale / _omega_k[i]; + if (integrator == OBABO) + c1_k[i] = exp(-0.5 * update->dt / tau_k[i]); + else if (integrator == BAOAB) + c1_k[i] = exp(-1.0 * update->dt / tau_k[i]); + else + error->universe_all(FLERR, + "Unknown integrator parameter for fix pimd/langevin. Only obabo and " + "baoab integrators are supported!"); + c2_k[i] = sqrt(1.0 - c1_k[i] * c1_k[i]); + } + for (int i = 0; i < np; i++) { + out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_k[i], tau_k[i], + c1_k[i], c2_k[i]); + } + } + else if (method == PIMD) { + for (int i = 0; i < np; i++) { + out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_np / sqrt(fmass), tau, + c1, c2); + } } if (thermostat == PILE_L) out += "PILE_L thermostat successfully initialized!\n"; out += "\n"; @@ -961,13 +981,25 @@ void FixPIMDLangevin::o_step() int *type = atom->type; double beta_np = 1.0 / force->boltz / Lan_temp * inverse_np * force->mvv2e; if (thermostat == PILE_L) { - for (int i = 0; i < nlocal; i++) { - atom->v[i][0] = c1_k[universe->iworld] * atom->v[i][0] + - c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][1] = c1_k[universe->iworld] * atom->v[i][1] + - c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][2] = c1_k[universe->iworld] * atom->v[i][2] + - c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + if (method == NMPIMD) { + for (int i = 0; i < nlocal; i++) { + atom->v[i][0] = c1_k[universe->iworld] * atom->v[i][0] + + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][1] = c1_k[universe->iworld] * atom->v[i][1] + + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][2] = c1_k[universe->iworld] * atom->v[i][2] + + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + } + } + else if (method == PIMD) { + for (int i = 0; i < nlocal; i++) { + atom->v[i][0] = c1 * atom->v[i][0] + + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][1] = c1 * atom->v[i][1] + + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][2] = c1 * atom->v[i][2] + + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + } } } } From f413d395a52c54b9e0e47e0adcd0c3a297aaba75 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Fri, 10 Nov 2023 00:20:51 -0500 Subject: [PATCH 005/116] add support for method=PIMD; add q_step function --- src/REPLICA/fix_pimd_langevin.cpp | 74 ++++++++++++++++++++++++++----- src/REPLICA/fix_pimd_langevin.h | 4 ++ 2 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index d4e2b6e526..0068546f44 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -538,11 +538,18 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) nmpimd_transform(bufsortedall, x, M_x2xp[universe->iworld]); else if (cmode == MULTI_PROC) nmpimd_transform(bufbeads, x, M_x2xp[universe->iworld]); + qc_step(); + a_step(); + qc_step(); + a_step(); + } + else if (method == PIMD) { + q_step(); + q_step(); + } + else { + error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); } - qc_step(); - a_step(); - qc_step(); - a_step(); } else if (integrator == BAOAB) { if (pstat_flag) { compute_totke(); @@ -556,18 +563,32 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) nmpimd_transform(bufsortedall, x, M_x2xp[universe->iworld]); else if (cmode == MULTI_PROC) nmpimd_transform(bufbeads, x, M_x2xp[universe->iworld]); + qc_step(); + a_step(); + } + else if (method == PIMD) { + q_step(); + } + else { + error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); } - qc_step(); - a_step(); if (tstat_flag) { o_step(); if (removecomflag) remove_com_motion(); if (pstat_flag) press_o_step(); } - qc_step(); - a_step(); + if (method == NMPIMD) { + qc_step(); + a_step(); + } + else if (method == PIMD) { + q_step(); + } + else { + error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } } else { - error->universe_all(FLERR, "Unknown integrator parameter for fix pimd/langevin"); + error->universe_all(FLERR, "Unknown integrator parameter for fix pimd/langevin. Only obabo and baoab integrators are supported!"); } collect_xc(); compute_spring_energy(); @@ -614,6 +635,7 @@ void FixPIMDLangevin::final_integrate() void FixPIMDLangevin::post_force(int /*flag*/) { + if (method == NMPIMD) { if (atom->nmax > maxunwrap) reallocate_x_unwrap(); if (atom->nmax > maxxc) reallocate_xc(); int nlocal = atom->nlocal; @@ -638,6 +660,7 @@ void FixPIMDLangevin::post_force(int /*flag*/) compute_vir(); compute_cvir(); compute_t_vir(); + } compute_pote(); if (method == NMPIMD) { inter_replica_comm(f); @@ -646,6 +669,12 @@ void FixPIMDLangevin::post_force(int /*flag*/) else if (cmode == MULTI_PROC) nmpimd_transform(bufbeads, f, M_x2xp[universe->iworld]); } + else if (method == PIMD) { + spring_force(); + } + else { + error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } c_pe->addstep(update->ntimestep + 1); c_press->addstep(update->ntimestep + 1); } @@ -660,6 +689,8 @@ void FixPIMDLangevin::end_of_step() if (pstat_flag) compute_totenthalpy(); } +/* ---------------------------------------------------------------------- */ + void FixPIMDLangevin::collect_xc() { int nlocal = atom->nlocal; @@ -693,7 +724,9 @@ void FixPIMDLangevin::collect_xc() void FixPIMDLangevin::b_step() { - + // used for both NMPIMD and PIMD + // For NMPIMD, force only includes the contribution of external potential. + // For PIMD, force includes the contributions of external potential and spring force. int n = atom->nlocal; int *type = atom->type; double **v = atom->v; @@ -711,6 +744,8 @@ void FixPIMDLangevin::b_step() void FixPIMDLangevin::qc_step() { + // used for NMPIMD + // evolve the centroid mode int nlocal = atom->nlocal; double **x = atom->x; double **v = atom->v; @@ -775,6 +810,8 @@ void FixPIMDLangevin::qc_step() void FixPIMDLangevin::a_step() { + // used for NMPIMD + // use analytical solution of harmonic oscillator to evolve the non-centroid modes int n = atom->nlocal; double **x = atom->x; double **v = atom->v; @@ -806,6 +843,14 @@ void FixPIMDLangevin::a_step() /* ---------------------------------------------------------------------- */ +void FixPIMDLangevin::q_step() +{ + // used for PIMD + // evolve all beads +} + +/* ---------------------------------------------------------------------- */ + void FixPIMDLangevin::baro_init() { vw[0] = vw[1] = vw[2] = vw[3] = vw[4] = vw[5] = 0.0; @@ -1086,6 +1131,12 @@ void FixPIMDLangevin::nmpimd_transform(double **src, double **des, double *vecto } } +/* ---------------------------------------------------------------------- */ + +void FixPIMDLangevin::spring_force() +{ +} + /* ---------------------------------------------------------------------- Comm operations ------------------------------------------------------------------------- */ @@ -1111,6 +1162,9 @@ void FixPIMDLangevin::comm_init() planrecv[i] = universe->root_proc[irecv]; modeindex[i] = irecv; } + + x_next = (universe->iworld + 1 + universe->nworlds) % (universe->nworlds); + x_last = (universe->iworld - 1 + universe->nworlds) % (universe->nworlds); } /* ---------------------------------------------------------------------- */ diff --git a/src/REPLICA/fix_pimd_langevin.h b/src/REPLICA/fix_pimd_langevin.h index 9730f65376..0f21b908b0 100644 --- a/src/REPLICA/fix_pimd_langevin.h +++ b/src/REPLICA/fix_pimd_langevin.h @@ -77,6 +77,8 @@ class FixPIMDLangevin : public Fix { int me, nprocs, ireplica, nreplica, nprocs_universe; int ntotal, maxlocal; + int x_last, x_next; + int cmode; int sizeplan; int *plansend, *planrecv; @@ -93,6 +95,7 @@ class FixPIMDLangevin : public Fix { void comm_init(); void inter_replica_comm(double **ptr); + void spring_force(); /* normal-mode operations */ @@ -121,6 +124,7 @@ class FixPIMDLangevin : public Fix { a_step(); // integrate for dt/2 according to A part (non-centroid mode, harmonic force between replicas) void qc_step(); // integrate for dt/2 for the centroid mode (x <- x + v * dt/2) void o_step(); // integrate for dt according to O part (O-U process, for thermostating) + void q_step(); // integrate for dt/2 for all the beads (x <- x + v * dt/2) /* Bussi-Zykova-Parrinello barostat */ From 63935194191f8b5d06005647d8c28914cc475b73 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Fri, 10 Nov 2023 16:07:00 -0500 Subject: [PATCH 006/116] q_step function --- src/REPLICA/fix_pimd_langevin.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 0068546f44..5a2df1bc75 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -847,6 +847,17 @@ void FixPIMDLangevin::q_step() { // used for PIMD // evolve all beads + int nlocal = atom->nlocal; + double **x = atom->x; + double **v = atom->v; + + if (!pstat_flag) { + for (int i = 0; i < nlocal; i++) { + x[i][0] += dtv * v[i][0]; + x[i][1] += dtv * v[i][1]; + x[i][2] += dtv * v[i][2]; + } + } } /* ---------------------------------------------------------------------- */ From ae3c33266784a34e613135b879acc04e8c247948 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 10 Nov 2023 16:16:49 -0600 Subject: [PATCH 007/116] Tested a coarser PME mesh with a lower accuracy (2e-4) --- examples/amoeba/amoeba_ubiquitin.key | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/amoeba/amoeba_ubiquitin.key b/examples/amoeba/amoeba_ubiquitin.key index 2870d071d4..3d63525258 100644 --- a/examples/amoeba/amoeba_ubiquitin.key +++ b/examples/amoeba/amoeba_ubiquitin.key @@ -12,7 +12,8 @@ ewald ewald-alpha 0.4 pewald-alpha 0.5 ewald-cutoff 7.0 -#pme-grid 60 45 45 pme-grid 60 48 48 -pme-order 5 polar-eps 0.00001 +#pme-grid 15 12 12 +#polar-eps 0.0002 +pme-order 5 From 98a0f43c9bbeac7f1688b073710d6b8d4e3aea09 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Sat, 11 Nov 2023 01:04:13 -0500 Subject: [PATCH 008/116] add spring_force function --- src/REPLICA/fix_pimd_langevin.cpp | 82 ++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 5a2df1bc75..6ef74c2a1a 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -121,7 +121,7 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : if (strcmp(arg[i], "method") == 0) { if (strcmp(arg[i + 1], "nmpimd") == 0) method = NMPIMD; - elif (strcmp(arg[i + 1], "pimd") == 0) + else if (strcmp(arg[i + 1], "pimd") == 0) method = PIMD; else error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin"); @@ -488,6 +488,13 @@ void FixPIMDLangevin::setup(int vflag) else if (cmode == MULTI_PROC) nmpimd_transform(bufbeads, x, M_x2xp[universe->iworld]); } + else if (method == PIMD) { + inter_replica_comm(x); + spring_force(); + } + else { + error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } collect_xc(); compute_spring_energy(); compute_t_prim(); @@ -591,9 +598,16 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) error->universe_all(FLERR, "Unknown integrator parameter for fix pimd/langevin. Only obabo and baoab integrators are supported!"); } collect_xc(); - compute_spring_energy(); - compute_t_prim(); - compute_p_prim(); + if (method == PIMD) { + inter_replica_comm(x); + spring_force(); + compute_spring_energy(); + } + if (method == NMPIMD) { + compute_spring_energy(); + compute_t_prim(); + compute_p_prim(); + } if (method == NMPIMD) { inter_replica_comm(x); @@ -635,14 +649,15 @@ void FixPIMDLangevin::final_integrate() void FixPIMDLangevin::post_force(int /*flag*/) { - if (method == NMPIMD) { - if (atom->nmax > maxunwrap) reallocate_x_unwrap(); - if (atom->nmax > maxxc) reallocate_xc(); int nlocal = atom->nlocal; double **x = atom->x; double **f = atom->f; imageint *image = atom->image; tagint *tag = atom->tag; + + if (method == NMPIMD) { + if (atom->nmax > maxunwrap) reallocate_x_unwrap(); + if (atom->nmax > maxxc) reallocate_xc(); for (int i = 0; i < nlocal; i++) { x_unwrap[i][0] = x[i][0]; x_unwrap[i][1] = x[i][1]; @@ -669,12 +684,7 @@ void FixPIMDLangevin::post_force(int /*flag*/) else if (cmode == MULTI_PROC) nmpimd_transform(bufbeads, f, M_x2xp[universe->iworld]); } - else if (method == PIMD) { - spring_force(); - } - else { - error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); - } + c_pe->addstep(update->ntimestep + 1); c_press->addstep(update->ntimestep + 1); } @@ -1146,6 +1156,41 @@ void FixPIMDLangevin::nmpimd_transform(double **src, double **des, double *vecto void FixPIMDLangevin::spring_force() { + spring_energy = 0.0; + + double **x = atom->x; + double **f = atom->f; + double* _mass = atom->mass; + int* type = atom->type; + int nlocal = atom->nlocal; + tagint* tagtmp = atom->tag; + + int *mask = atom->mask; + + for (int i=0; iuworld); total_spring_energy /= universe->procs_per_world[universe->iworld]; + } + else if (method == PIMD) { + total_spring_energy = se_bead = 0.0; + MPI_Allreduce(&spring_energy, &se_bead, 1, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(&se_bead, &total_spring_energy, 1, MPI_DOUBLE, MPI_SUM, universe->uworld); + total_spring_energy /= universe->procs_per_world[universe->iworld]; + } + else { + error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } } /* ---------------------------------------------------------------------- */ From ba32afc06e293384653f4856069575268ae47e9c Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Sat, 11 Nov 2023 01:45:36 -0500 Subject: [PATCH 009/116] fix spring_energy --- src/REPLICA/fix_pimd_langevin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 6ef74c2a1a..96cf4bef35 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -1188,7 +1188,7 @@ void FixPIMDLangevin::spring_force() f[i][1] -= (dy) * ff; f[i][2] -= (dz) * ff; - spring_energy += ff * (delx2*delx2+dely2*dely2+delz2*delz2); + spring_energy += 0.5 * ff * (delx2*delx2+dely2*dely2+delz2*delz2); } } } From 4ef27552c4492b92c3ea889309ccd2ec3e2c8245 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Sat, 11 Nov 2023 03:10:00 -0500 Subject: [PATCH 010/116] fix spring_force()'s position --- src/REPLICA/fix_pimd_langevin.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 96cf4bef35..4e94b77a86 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -598,11 +598,7 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) error->universe_all(FLERR, "Unknown integrator parameter for fix pimd/langevin. Only obabo and baoab integrators are supported!"); } collect_xc(); - if (method == PIMD) { - inter_replica_comm(x); - spring_force(); - compute_spring_energy(); - } + if (method == NMPIMD) { compute_spring_energy(); compute_t_prim(); @@ -676,6 +672,18 @@ void FixPIMDLangevin::post_force(int /*flag*/) compute_cvir(); compute_t_vir(); } + + if (method == PIMD) { + if (mapflag) { + for (int i = 0; i < nlocal; i++) { domain->unmap(x[i], image[i]); } + } + inter_replica_comm(x); + spring_force(); + compute_spring_energy(); + if (mapflag) { + for (int i = 0; i < nlocal; i++) { domain->unmap_inv(x[i], image[i]); } + } + } compute_pote(); if (method == NMPIMD) { inter_replica_comm(f); @@ -1165,7 +1173,10 @@ void FixPIMDLangevin::spring_force() int nlocal = atom->nlocal; tagint* tagtmp = atom->tag; + // printf("iworld = %d, x_last = %d, x_next = %d\n", universe->iworld, x_last, x_next); int *mask = atom->mask; + + // int idx_tmp = atom->map(1); for (int i=0; i Date: Sat, 11 Nov 2023 04:10:31 -0500 Subject: [PATCH 011/116] update document for method=pimd --- doc/src/fix_pimd.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 5b51b97c52..6abac408ca 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -31,7 +31,7 @@ Syntax .. parsed-literal:: *keywords* = *method* or *integrator* or *ensemble* or *fmmode* or *fmass* or *scale* or *temp* or *thermostat* or *tau* or *iso* or *aniso* or *barostat* or *taup* or *fixcom* or *lj* - *method* value = *nmpimd* + *method* value = *nmpimd* (default) or *pimd* *integrator* value = *obabo* or *baoab* *fmmode* value = *physical* or *normal* *fmass* value = scaling factor on mass @@ -137,9 +137,6 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics the real particle. .. note:: - Fix pimd/langevin only supports *method* value *nmpimd*. This should be enough - for most PIMD applications for quantum thermodynamics purpose. - Motion of the centroid can be effectively uncoupled from the other normal modes by scaling the fictitious masses to achieve a partial adiabatic separation. This is called a Centroid Molecular Dynamics @@ -151,6 +148,10 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics only the k > 0 modes are thermostatted, not the centroid degrees of freedom. +Fix pimd/langevin supports *method* value *nmpimd* and *pimd*. The default value is *nmpimd*. +If *method* is *nmpimd*, the normal mode representation is used to integrate the equations of motion. The exact solution of harmonic oscillator is used to propagate the free ring polymer part of the Hamiltonian. +If *method* is *pimd*, the Cartesian representation is used to integrate the equations of motion. The harmonic force is added to the total force of the system, and the numerical integrator is used to propagate the Hamiltonian. + The keyword *integrator* specifies the Trotter splitting method used by *fix pimd/langevin*. See :ref:`(Liu) ` for a discussion on the OBABO and BAOAB splitting schemes. Typically either of the two should work fine. From 3d14e2e0e23b1b7ee4efb74b71613c129910260e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 11 Nov 2023 06:05:07 -0500 Subject: [PATCH 012/116] whitespace --- src/REPLICA/fix_pimd_langevin.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 4e94b77a86..ba02b7a184 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -1177,25 +1177,25 @@ void FixPIMDLangevin::spring_force() int *mask = atom->mask; // int idx_tmp = atom->map(1); - + for (int i=0; i Date: Sat, 11 Nov 2023 06:09:15 -0500 Subject: [PATCH 013/116] doc tweaks --- doc/src/fix_pimd.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 6abac408ca..91c5e58add 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -137,6 +137,7 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics the real particle. .. note:: + Motion of the centroid can be effectively uncoupled from the other normal modes by scaling the fictitious masses to achieve a partial adiabatic separation. This is called a Centroid Molecular Dynamics @@ -148,9 +149,15 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics only the k > 0 modes are thermostatted, not the centroid degrees of freedom. -Fix pimd/langevin supports *method* value *nmpimd* and *pimd*. The default value is *nmpimd*. -If *method* is *nmpimd*, the normal mode representation is used to integrate the equations of motion. The exact solution of harmonic oscillator is used to propagate the free ring polymer part of the Hamiltonian. -If *method* is *pimd*, the Cartesian representation is used to integrate the equations of motion. The harmonic force is added to the total force of the system, and the numerical integrator is used to propagate the Hamiltonian. +.. versionadded:: TBD + + Mode *pimd* added to fix pimd/langevin. + +Fix pimd/langevin supports the *method* values *nmpimd* and *pimd*. The default value is *nmpimd*. +If *method* is *nmpimd*, the normal mode representation is used to integrate the equations of motion. +The exact solution of harmonic oscillator is used to propagate the free ring polymer part of the Hamiltonian. +If *method* is *pimd*, the Cartesian representation is used to integrate the equations of motion. +The harmonic force is added to the total force of the system, and the numerical integrator is used to propagate the Hamiltonian. The keyword *integrator* specifies the Trotter splitting method used by *fix pimd/langevin*. See :ref:`(Liu) ` for a discussion on the OBABO and BAOAB splitting schemes. Typically @@ -208,6 +215,7 @@ The keyword *thermostat* reads *style* and *seed* of thermostat for fix style *p be *PILE_L* (path integral Langevin equation local thermostat, as described in :ref:`Ceriotti `), and *seed* should a positive integer number, which serves as the seed of the pseudo random number generator. .. note:: + The fix style *pimd/langevin* uses the stochastic PILE_L thermostat to control temperature. This thermostat works on the normal modes of the ring polymer. The *tau* parameter controls the centroid mode, and the *scale* parameter controls the non-centroid modes. @@ -270,6 +278,7 @@ related tasks for each of the partitions, e.g. read_restart system_${ibead}.restart2 .. note:: + Fix *pimd/langevin* dumps the Cartesian coordinates, but dumps the velocities and forces in the normal mode representation. If the Cartesian velocities and forces are needed, it is easy to perform the transformation when doing post-processing. From 9ef1b2d64d7a97af80cb20462026533430781dfc Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 11 Nov 2023 06:20:55 -0500 Subject: [PATCH 014/116] apply clang-format --- src/REPLICA/fix_pimd_langevin.cpp | 195 +++++++++++++++--------------- 1 file changed, 100 insertions(+), 95 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index ba02b7a184..d328420ce9 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -173,7 +173,10 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : "mass and normal mode mass are supported!"); } else if (strcmp(arg[i], "scale") == 0) { if (method == PIMD) - error->universe_all(FLERR, "The scale parameter of the PILE_L thermostat is not supported for method pimd. Delete scale parameter if you do want to use method pimd."); + error->universe_all( + FLERR, + "The scale parameter of the PILE_L thermostat is not supported for method pimd. Delete " + "scale parameter if you do want to use method pimd."); pilescale = utils::numeric(FLERR, arg[i + 1], false, lmp); if (pilescale < 0.0) error->universe_all(FLERR, "Invalid PILE_L scale value for fix pimd/langevin"); @@ -250,10 +253,13 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : FLERR, fmt::format("Must not use pressure coupling with {} ensemble", Ensembles[ensemble])); if (method == PIMD && pstat_flag) - error->universe_all(FLERR, "Pressure control has not been supported for method pimd yet. Please set method to nmpimd."); + error->universe_all(FLERR, + "Pressure control has not been supported for method pimd yet. Please set " + "method to nmpimd."); if (method == PIMD && fmmode == NORMAL) - error->universe_all(FLERR, "Normal mode mass is not supported for method pimd. Please set method to nmpimd."); + error->universe_all( + FLERR, "Normal mode mass is not supported for method pimd. Please set method to nmpimd."); /* Initiation */ @@ -487,13 +493,13 @@ void FixPIMDLangevin::setup(int vflag) nmpimd_transform(bufsortedall, x, M_x2xp[universe->iworld]); else if (cmode == MULTI_PROC) nmpimd_transform(bufbeads, x, M_x2xp[universe->iworld]); - } - else if (method == PIMD) { + } else if (method == PIMD) { inter_replica_comm(x); spring_force(); - } - else { - error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } else { + error->universe_all( + FLERR, + "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); } collect_xc(); compute_spring_energy(); @@ -549,13 +555,13 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) a_step(); qc_step(); a_step(); - } - else if (method == PIMD) { + } else if (method == PIMD) { q_step(); q_step(); - } - else { - error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } else { + error->universe_all( + FLERR, + "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); } } else if (integrator == BAOAB) { if (pstat_flag) { @@ -572,12 +578,12 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) nmpimd_transform(bufbeads, x, M_x2xp[universe->iworld]); qc_step(); a_step(); - } - else if (method == PIMD) { + } else if (method == PIMD) { q_step(); - } - else { - error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } else { + error->universe_all( + FLERR, + "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); } if (tstat_flag) { o_step(); @@ -587,15 +593,17 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) if (method == NMPIMD) { qc_step(); a_step(); - } - else if (method == PIMD) { + } else if (method == PIMD) { q_step(); - } - else { - error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + } else { + error->universe_all( + FLERR, + "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); } } else { - error->universe_all(FLERR, "Unknown integrator parameter for fix pimd/langevin. Only obabo and baoab integrators are supported!"); + error->universe_all(FLERR, + "Unknown integrator parameter for fix pimd/langevin. Only obabo and baoab " + "integrators are supported!"); } collect_xc(); @@ -652,25 +660,25 @@ void FixPIMDLangevin::post_force(int /*flag*/) tagint *tag = atom->tag; if (method == NMPIMD) { - if (atom->nmax > maxunwrap) reallocate_x_unwrap(); - if (atom->nmax > maxxc) reallocate_xc(); - for (int i = 0; i < nlocal; i++) { - x_unwrap[i][0] = x[i][0]; - x_unwrap[i][1] = x[i][1]; - x_unwrap[i][2] = x[i][2]; - } - if (mapflag) { - for (int i = 0; i < nlocal; i++) { domain->unmap(x_unwrap[i], image[i]); } - } - for (int i = 0; i < nlocal; i++) { - xc[i][0] = xcall[3 * (tag[i] - 1) + 0]; - xc[i][1] = xcall[3 * (tag[i] - 1) + 1]; - xc[i][2] = xcall[3 * (tag[i] - 1) + 2]; - } + if (atom->nmax > maxunwrap) reallocate_x_unwrap(); + if (atom->nmax > maxxc) reallocate_xc(); + for (int i = 0; i < nlocal; i++) { + x_unwrap[i][0] = x[i][0]; + x_unwrap[i][1] = x[i][1]; + x_unwrap[i][2] = x[i][2]; + } + if (mapflag) { + for (int i = 0; i < nlocal; i++) { domain->unmap(x_unwrap[i], image[i]); } + } + for (int i = 0; i < nlocal; i++) { + xc[i][0] = xcall[3 * (tag[i] - 1) + 0]; + xc[i][1] = xcall[3 * (tag[i] - 1) + 1]; + xc[i][2] = xcall[3 * (tag[i] - 1) + 2]; + } - compute_vir(); - compute_cvir(); - compute_t_vir(); + compute_vir(); + compute_cvir(); + compute_t_vir(); } if (method == PIMD) { @@ -1032,13 +1040,12 @@ void FixPIMDLangevin::langevin_init() } for (int i = 0; i < np; i++) { out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_k[i], tau_k[i], - c1_k[i], c2_k[i]); + c1_k[i], c2_k[i]); } - } - else if (method == PIMD) { + } else if (method == PIMD) { for (int i = 0; i < np; i++) { - out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_np / sqrt(fmass), tau, - c1, c2); + out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_np / sqrt(fmass), + tau, c1, c2); } } if (thermostat == PILE_L) out += "PILE_L thermostat successfully initialized!\n"; @@ -1064,15 +1071,14 @@ void FixPIMDLangevin::o_step() atom->v[i][2] = c1_k[universe->iworld] * atom->v[i][2] + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); } - } - else if (method == PIMD) { + } else if (method == PIMD) { for (int i = 0; i < nlocal; i++) { - atom->v[i][0] = c1 * atom->v[i][0] + - c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][1] = c1 * atom->v[i][1] + - c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][2] = c1 * atom->v[i][2] + - c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][0] = + c1 * atom->v[i][0] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][1] = + c1 * atom->v[i][1] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][2] = + c1 * atom->v[i][2] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); } } } @@ -1168,39 +1174,38 @@ void FixPIMDLangevin::spring_force() double **x = atom->x; double **f = atom->f; - double* _mass = atom->mass; - int* type = atom->type; + double *_mass = atom->mass; + int *type = atom->type; int nlocal = atom->nlocal; - tagint* tagtmp = atom->tag; + tagint *tagtmp = atom->tag; // printf("iworld = %d, x_last = %d, x_next = %d\n", universe->iworld, x_last, x_next); int *mask = atom->mask; // int idx_tmp = atom->map(1); - for (int i=0; ix; - double *_mass = atom->mass; - int *type = atom->type; - int nlocal = atom->nlocal; + double **x = atom->x; + double *_mass = atom->mass; + int *type = atom->type; + int nlocal = atom->nlocal; - for (int i = 0; i < nlocal; i++) { - spring_energy += 0.5 * _mass[type[i]] * fbond * lam[universe->iworld] * - (x[i][0] * x[i][0] + x[i][1] * x[i][1] + x[i][2] * x[i][2]); - } - MPI_Allreduce(&spring_energy, &se_bead, 1, MPI_DOUBLE, MPI_SUM, world); - MPI_Allreduce(&se_bead, &total_spring_energy, 1, MPI_DOUBLE, MPI_SUM, universe->uworld); - total_spring_energy /= universe->procs_per_world[universe->iworld]; - } - else if (method == PIMD) { + for (int i = 0; i < nlocal; i++) { + spring_energy += 0.5 * _mass[type[i]] * fbond * lam[universe->iworld] * + (x[i][0] * x[i][0] + x[i][1] * x[i][1] + x[i][2] * x[i][2]); + } + MPI_Allreduce(&spring_energy, &se_bead, 1, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(&se_bead, &total_spring_energy, 1, MPI_DOUBLE, MPI_SUM, universe->uworld); + total_spring_energy /= universe->procs_per_world[universe->iworld]; + } else if (method == PIMD) { total_spring_energy = se_bead = 0.0; MPI_Allreduce(&spring_energy, &se_bead, 1, MPI_DOUBLE, MPI_SUM, world); MPI_Allreduce(&se_bead, &total_spring_energy, 1, MPI_DOUBLE, MPI_SUM, universe->uworld); - total_spring_energy /= universe->procs_per_world[universe->iworld]; - } - else { - error->universe_all(FLERR, "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); + total_spring_energy /= universe->procs_per_world[universe->iworld]; + } else { + error->universe_all( + FLERR, + "Unknown method parameter for fix pimd/langevin. Only nmpimd and pimd are supported!"); } } From be02ef3a536cd3b9b9223a844ad92c874de83955 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 11 Nov 2023 08:58:27 -0500 Subject: [PATCH 015/116] fix broken link --- doc/src/fix_deposit.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/fix_deposit.rst b/doc/src/fix_deposit.rst index d7c78c5d15..4c256f524f 100644 --- a/doc/src/fix_deposit.rst +++ b/doc/src/fix_deposit.rst @@ -220,6 +220,8 @@ rotated configuration of the molecule. existing particle. LAMMPS will issue a warning if R is smaller than this value, based on the radii of existing and inserted particles. +.. versionadded:: TBD + The *var* and *set* keywords can be used together to provide a criterion for accepting or rejecting the addition of an individual atom, based on its coordinates. The *name* specified for the *var* keyword is the name of an @@ -236,7 +238,7 @@ created atom, one for *y*, and one for *z*. When an atom is created, its is defined. The *var* variable is then evaluated. If the returned value is 0.0, the atom is not created. If it is non-zero, the atom is created. For an example of how to use these keywords, see the -:doc:`create_atoms `command. +:doc:`create_atoms ` command. The *rate* option moves the insertion volume in the z direction (3d) or y direction (2d). This enables particles to be inserted from a From e6524b59fab72b0521eeeebca88076553313d0ac Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 11 Nov 2023 08:58:58 -0500 Subject: [PATCH 016/116] only check significant chunk of the words --- src/info.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/info.cpp b/src/info.cpp index 948cbbfe15..491cfc728d 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -167,16 +167,16 @@ void Info::command(int narg, char **arg) if ((out != screen) && (out != logfile)) fclose(out); out = fopen(arg[idx+2],"w"); idx += 3; - } else if (strncmp(arg[idx],"communication",5) == 0) { + } else if (strncmp(arg[idx],"communication",4) == 0) { flags |= COMM; ++idx; - } else if (strncmp(arg[idx],"computes",5) == 0) { + } else if (strncmp(arg[idx],"computes",4) == 0) { flags |= COMPUTES; ++idx; - } else if (strncmp(arg[idx],"dumps",5) == 0) { + } else if (strncmp(arg[idx],"dumps",3) == 0) { flags |= DUMPS; ++idx; - } else if (strncmp(arg[idx],"fixes",5) == 0) { + } else if (strncmp(arg[idx],"fixes",3) == 0) { flags |= FIXES; ++idx; } else if (strncmp(arg[idx],"groups",3) == 0) { From be6fcaa77fe18a81cb8b00a961d68fd614596018 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 11 Nov 2023 12:07:05 -0500 Subject: [PATCH 017/116] modernize --- src/BOCS/fix_bocs.cpp | 94 +++---- src/BOCS/fix_bocs.h | 3 +- src/DRUDE/fix_tgnh_drude.cpp | 129 ++++------ src/DRUDE/fix_tgnh_drude.h | 3 +- src/EXTRA-FIX/fix_npt_cauchy.cpp | 76 ++---- src/EXTRA-FIX/fix_npt_cauchy.h | 3 +- src/QTB/fix_qbmsst.cpp | 38 +-- src/QTB/fix_qbmsst.h | 3 +- src/SHOCK/fix_msst.cpp | 418 ++++++++++++++----------------- src/SHOCK/fix_msst.h | 5 +- src/fix_press_berendsen.cpp | 27 +- src/fix_press_berendsen.h | 5 +- src/fix_press_langevin.cpp | 26 +- src/fix_press_langevin.h | 5 +- 14 files changed, 328 insertions(+), 507 deletions(-) diff --git a/src/BOCS/fix_bocs.cpp b/src/BOCS/fix_bocs.cpp index d35facdc5a..4918f8d879 100644 --- a/src/BOCS/fix_bocs.cpp +++ b/src/BOCS/fix_bocs.cpp @@ -69,12 +69,9 @@ const int NUM_INPUT_DATA_COLUMNS = 2; // columns in the pressure correction ---------------------------------------------------------------------- */ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), - rfix(nullptr), id_dilate(nullptr), irregular(nullptr), - id_temp(nullptr), id_press(nullptr), - eta(nullptr), eta_dot(nullptr), eta_dotdot(nullptr), - eta_mass(nullptr), etap(nullptr), etap_dot(nullptr), etap_dotdot(nullptr), - etap_mass(nullptr) + Fix(lmp, narg, arg), id_dilate(nullptr), irregular(nullptr), id_temp(nullptr), + id_press(nullptr), eta(nullptr), eta_dot(nullptr), eta_dotdot(nullptr), eta_mass(nullptr), + etap(nullptr), etap_dot(nullptr), etap_dotdot(nullptr), etap_mass(nullptr) { if (lmp->citeme) lmp->citeme->add(cite_user_bocs_package); @@ -379,9 +376,6 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) : if (deviatoric_flag) size_vector += 1; } - nrigid = 0; - rfix = nullptr; - if (pre_exchange_flag) irregular = new Irregular(lmp); else irregular = nullptr; @@ -424,31 +418,29 @@ FixBocs::~FixBocs() { if (copymode) return; - delete [] id_dilate; - delete [] rfix; - + delete[] id_dilate; delete irregular; // delete temperature and pressure if fix created them if (tcomputeflag) modify->delete_compute(id_temp); - delete [] id_temp; + delete[] id_temp; if (tstat_flag) { - delete [] eta; - delete [] eta_dot; - delete [] eta_dotdot; - delete [] eta_mass; + delete[] eta; + delete[] eta_dot; + delete[] eta_dotdot; + delete[] eta_mass; } if (pstat_flag) { if (pcomputeflag) modify->delete_compute(id_press); - delete [] id_press; + delete[] id_press; if (mpchain) { - delete [] etap; - delete [] etap_dot; - delete [] etap_dotdot; - delete [] etap_mass; + delete[] etap; + delete[] etap_dot; + delete[] etap_dotdot; + delete[] etap_mass; } } if (p_match_coeffs) free(p_match_coeffs); @@ -596,20 +588,10 @@ void FixBocs::init() } // detect if any rigid fixes exist so rigid bodies move when box is remapped - // rfix[] = indices to each fix rigid - delete [] rfix; - nrigid = 0; - rfix = nullptr; - - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) nrigid++; - if (nrigid) { - rfix = new int[nrigid]; - nrigid = 0; - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) rfix[nrigid++] = i; - } + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); } // NJD MRD 2 functions @@ -1204,9 +1186,7 @@ void FixBocs::remap() domain->x2lamda(x[i],x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(0); + for (auto &ifix : rfix) ifix->deform(0); // reset global and local box to new size/shape @@ -1351,9 +1331,7 @@ void FixBocs::remap() domain->lamda2x(x[i],x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(1); + for (auto &ifix : rfix) ifix->deform(1); } /* ---------------------------------------------------------------------- @@ -1512,7 +1490,7 @@ int FixBocs::modify_param(int narg, char **arg) modify->delete_compute(id_temp); tcomputeflag = 0; } - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); int icompute = modify->find_compute(arg[1]); @@ -1544,29 +1522,23 @@ int FixBocs::modify_param(int narg, char **arg) modify->delete_compute(id_press); pcomputeflag = 0; } - delete [] id_press; + delete[] id_press; id_press = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); - pressure = modify->compute[icompute]; + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Could not find fix_modify pressure ID {}", id_press); + if (!pressure->pressflag) + error->all(FLERR, "Fix_modify pressure ID {} does not compute pressure", id_press); - if (p_match_flag) // NJD MRD - { - if (p_basis_type == BASIS_ANALYTIC) - { - (dynamic_cast(pressure))->send_cg_info(p_basis_type, N_p_match, - p_match_coeffs, N_mol, vavg); + if (p_match_flag) { + auto bocspress = dynamic_cast(pressure); + if (bocspress) { + if (p_basis_type == BASIS_ANALYTIC) { + bocspress->send_cg_info(p_basis_type, N_p_match, p_match_coeffs, N_mol, vavg); + } else if (p_basis_type == BASIS_LINEAR_SPLINE || p_basis_type == BASIS_CUBIC_SPLINE) { + bocspress->send_cg_info(p_basis_type, splines, spline_length); + } } - else if (p_basis_type == BASIS_LINEAR_SPLINE || p_basis_type == BASIS_CUBIC_SPLINE ) - { - (dynamic_cast(pressure))->send_cg_info(p_basis_type, splines, spline_length ); - } - } - - if (pressure->pressflag == 0) - { - error->all(FLERR, "Fix_modify pressure ID does not compute pressure"); } return 2; } diff --git a/src/BOCS/fix_bocs.h b/src/BOCS/fix_bocs.h index fd47fda4d7..71fbc273d8 100644 --- a/src/BOCS/fix_bocs.h +++ b/src/BOCS/fix_bocs.h @@ -75,9 +75,8 @@ class FixBocs : public Fix { double drag, tdrag_factor; // drag factor on particle thermostat double pdrag_factor; // drag factor on barostat int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes int dilate_group_bit; // mask for dilation group - int *rfix; // indices of rigid fixes + std::vector rfix; // list of rigid fixes char *id_dilate; // group name to dilate class Irregular *irregular; // for migrating atoms after box flips diff --git a/src/DRUDE/fix_tgnh_drude.cpp b/src/DRUDE/fix_tgnh_drude.cpp index 273f163303..987408fe63 100644 --- a/src/DRUDE/fix_tgnh_drude.cpp +++ b/src/DRUDE/fix_tgnh_drude.cpp @@ -52,14 +52,13 @@ enum{ISO,ANISO,TRICLINIC}; ---------------------------------------------------------------------- */ FixTGNHDrude::FixTGNHDrude(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), - rfix(nullptr), irregular(nullptr), id_temp(nullptr), id_press(nullptr), - etamol(nullptr), etamol_dot(nullptr), etamol_dotdot(nullptr), etamol_mass(nullptr), - etaint(nullptr), etaint_dot(nullptr), etaint_dotdot(nullptr), etaint_mass(nullptr), - etadrude(nullptr), etadrude_dot(nullptr), etadrude_dotdot(nullptr), etadrude_mass(nullptr), - etap(nullptr), etap_dot(nullptr), etap_dotdot(nullptr), etap_mass(nullptr) + Fix(lmp, narg, arg), irregular(nullptr), id_temp(nullptr), id_press(nullptr), etamol(nullptr), + etamol_dot(nullptr), etamol_dotdot(nullptr), etamol_mass(nullptr), etaint(nullptr), + etaint_dot(nullptr), etaint_dotdot(nullptr), etaint_mass(nullptr), etadrude(nullptr), + etadrude_dot(nullptr), etadrude_dotdot(nullptr), etadrude_mass(nullptr), etap(nullptr), + etap_dot(nullptr), etap_dotdot(nullptr), etap_mass(nullptr) { - if (narg < 4) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + if (narg < 4) error->all(FLERR, "Illegal fix {} command", style); restart_global = 1; dynamic_group_allow = 0; @@ -507,9 +506,6 @@ FixTGNHDrude::FixTGNHDrude(LAMMPS *lmp, int narg, char **arg) : } } - nrigid = 0; - rfix = nullptr; - if (pre_exchange_flag) irregular = new Irregular(lmp); else irregular = nullptr; @@ -519,15 +515,15 @@ FixTGNHDrude::FixTGNHDrude(LAMMPS *lmp, int narg, char **arg) : vol0 = t0 = 0.0; // find fix drude - int ifix; - for (ifix = 0; ifix < modify->nfix; ifix++) - if (strcmp(modify->fix[ifix]->style,"drude") == 0) break; - if (ifix == modify->nfix) error->all(FLERR, "fix tgnh/drude requires fix drude"); - fix_drude = dynamic_cast(modify->fix[ifix]); + + auto fdrude = modify->get_fix_by_style("^drude"); + if (fdrude.size() < 1) error->all(FLERR, "Fix {} requires fix drude", style); + fix_drude = dynamic_cast(fdrude[0]); + if (!fix_drude) error->all(FLERR, "Fix {} requires fix drude", style); // make sure ghost atoms have velocity if (!comm->ghost_velocity) - error->all(FLERR,"fix tgnh/drude requires ghost velocities. Use comm_modify vel yes"); + error->all(FLERR,"Fix {} requires ghost velocities. Use comm_modify vel yes", style); } /* ---------------------------------------------------------------------- */ @@ -536,38 +532,36 @@ FixTGNHDrude::~FixTGNHDrude() { if (copymode) return; - delete [] rfix; - delete irregular; // delete temperature and pressure if fix created them if (tcomputeflag) modify->delete_compute(id_temp); - delete [] id_temp; + delete[] id_temp; if (tstat_flag) { - delete [] etaint; - delete [] etaint_dot; - delete [] etaint_dotdot; - delete [] etaint_mass; - delete [] etamol; - delete [] etamol_dot; - delete [] etamol_dotdot; - delete [] etamol_mass; - delete [] etadrude; - delete [] etadrude_dot; - delete [] etadrude_dotdot; - delete [] etadrude_mass; + delete[] etaint; + delete[] etaint_dot; + delete[] etaint_dotdot; + delete[] etaint_mass; + delete[] etamol; + delete[] etamol_dot; + delete[] etamol_dotdot; + delete[] etamol_mass; + delete[] etadrude; + delete[] etadrude_dot; + delete[] etadrude_dotdot; + delete[] etadrude_mass; } if (pstat_flag) { if (pcomputeflag) modify->delete_compute(id_press); - delete [] id_press; + delete[] id_press; if (mpchain) { - delete [] etap; - delete [] etap_dot; - delete [] etap_dotdot; - delete [] etap_mass; + delete[] etap; + delete[] etap_dot; + delete[] etap_dotdot; + delete[] etap_mass; } } } @@ -605,19 +599,15 @@ void FixTGNHDrude::init() // set temperature and pressure ptrs - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix nvt/npt does not exist"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) error->all(FLERR,"Temperature ID for fix {} does not exist", style); if (temperature->tempbias) which = BIAS; else which = NOBIAS; if (pstat_flag) { - icompute = modify->find_compute(id_press); - if (icompute < 0) - error->all(FLERR,"Pressure ID for fix npt/nph does not exist"); - pressure = modify->compute[icompute]; + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Pressure ID for fix {} does not exist", id_press); } // set timesteps and frequencies @@ -670,20 +660,10 @@ void FixTGNHDrude::init() } // detect if any rigid fixes exist so rigid bodies move when box is remapped - // rfix[] = indices to each fix rigid - delete [] rfix; - nrigid = 0; - rfix = nullptr; - - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) nrigid++; - if (nrigid) { - rfix = new int[nrigid]; - nrigid = 0; - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) rfix[nrigid++] = i; - } + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); } /* ---------------------------------------------------------------------- @@ -1111,9 +1091,7 @@ void FixTGNHDrude::remap() domain->x2lamda(nlocal); - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(0); + for (auto &ifix : rfix) ifix->deform(0); // reset global and local box to new size/shape @@ -1253,9 +1231,7 @@ void FixTGNHDrude::remap() domain->lamda2x(nlocal); - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(1); + for (auto &ifix : rfix) ifix->deform(1); } /* ---------------------------------------------------------------------- @@ -1426,27 +1402,23 @@ int FixTGNHDrude::modify_param(int narg, char **arg) modify->delete_compute(id_temp); tcomputeflag = 0; } - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) - error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) error->all(FLERR,"Could not find fix_modify temperature ID {}", id_temp); if (temperature->tempflag == 0) - error->all(FLERR, - "Fix_modify temperature ID does not compute temperature"); + error->all(FLERR, "Fix_modify temperature ID {} does not compute temperature", id_temp); if (temperature->igroup != 0 && comm->me == 0) error->warning(FLERR,"Temperature for fix modify is not for group all"); // reset id_temp of pressure to new temperature ID if (pstat_flag) { - icompute = modify->find_compute(id_press); - if (icompute < 0) - error->all(FLERR,"Pressure ID for fix modify does not exist"); - modify->compute[icompute]->reset_extra_compute_fix(id_temp); + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Pressure ID {} for fix modify does not exist", id_press); + pressure->reset_extra_compute_fix(id_temp); } return 2; @@ -1458,15 +1430,14 @@ int FixTGNHDrude::modify_param(int narg, char **arg) modify->delete_compute(id_press); pcomputeflag = 0; } - delete [] id_press; + delete[] id_press; id_press = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); - pressure = modify->compute[icompute]; + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Could not find fix_modify pressure ID {}", id_press); if (pressure->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); + error->all(FLERR,"Fix_modify pressure ID {} does not compute pressure", id_press); return 2; } diff --git a/src/DRUDE/fix_tgnh_drude.h b/src/DRUDE/fix_tgnh_drude.h index adfa69671a..b2724809b4 100644 --- a/src/DRUDE/fix_tgnh_drude.h +++ b/src/DRUDE/fix_tgnh_drude.h @@ -63,8 +63,7 @@ class FixTGNHDrude : public Fix { double omega_mass[6]; double p_current[6]; int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes - int *rfix; // indices of rigid fixes + std::vector rfix; // indices of rigid fixes class Irregular *irregular; // for migrating atoms after box flips int nlevels_respa; diff --git a/src/EXTRA-FIX/fix_npt_cauchy.cpp b/src/EXTRA-FIX/fix_npt_cauchy.cpp index feb5a95c6f..f3dfd1af36 100644 --- a/src/EXTRA-FIX/fix_npt_cauchy.cpp +++ b/src/EXTRA-FIX/fix_npt_cauchy.cpp @@ -54,14 +54,12 @@ enum{ISO,ANISO,TRICLINIC}; ---------------------------------------------------------------------- */ FixNPTCauchy::FixNPTCauchy(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), - rfix(nullptr), id_dilate(nullptr), irregular(nullptr), - id_temp(nullptr), id_press(nullptr), - eta(nullptr), eta_dot(nullptr), eta_dotdot(nullptr), - eta_mass(nullptr), etap(nullptr), etap_dot(nullptr), etap_dotdot(nullptr), - etap_mass(nullptr), id_store(nullptr), init_store(nullptr) + Fix(lmp, narg, arg), id_dilate(nullptr), irregular(nullptr), id_temp(nullptr), + id_press(nullptr), eta(nullptr), eta_dot(nullptr), eta_dotdot(nullptr), eta_mass(nullptr), + etap(nullptr), etap_dot(nullptr), etap_dotdot(nullptr), etap_mass(nullptr), id_store(nullptr), + init_store(nullptr) { - if (narg < 4) error->all(FLERR,"Illegal fix npt/cauchy command"); + if (narg < 4) error->all(FLERR, "Illegal fix npt/cauchy command"); dynamic_group_allow = 1; ecouple_flag = 1; @@ -571,9 +569,6 @@ FixNPTCauchy::FixNPTCauchy(LAMMPS *lmp, int narg, char **arg) : if (deviatoric_flag) size_vector += 1; } - nrigid = 0; - rfix = nullptr; - if (pre_exchange_flag) irregular = new Irregular(lmp); else irregular = nullptr; @@ -619,8 +614,6 @@ FixNPTCauchy::~FixNPTCauchy() if (copymode) return; delete[] id_dilate; - delete[] rfix; - delete[] id_store; delete irregular; @@ -690,19 +683,16 @@ void FixNPTCauchy::init() // set temperature and pressure ptrs - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix npt/cauchy does not exist"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR,"Temperature ID {} for fix npt/cauchy does not exist", id_temp); if (temperature->tempbias) which = BIAS; else which = NOBIAS; if (pstat_flag) { - icompute = modify->find_compute(id_press); - if (icompute < 0) - error->all(FLERR,"Pressure ID for fix npt/cauchy does not exist"); - pressure = modify->compute[icompute]; + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Pressure ID {} for fix npt/cauchy does not exist", id_press); } // set timesteps and frequencies @@ -759,20 +749,10 @@ void FixNPTCauchy::init() } // detect if any rigid fixes exist so rigid bodies move when box is remapped - // rfix[] = indices to each fix rigid - delete[] rfix; - nrigid = 0; - rfix = nullptr; - - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) nrigid++; - if (nrigid) { - rfix = new int[nrigid]; - nrigid = 0; - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) rfix[nrigid++] = i; - } + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); } /* ---------------------------------------------------------------------- @@ -1121,9 +1101,7 @@ void FixNPTCauchy::remap() domain->x2lamda(x[i],x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(0); + for (auto &ifix : rfix) ifix->deform(0); // reset global and local box to new size/shape @@ -1268,9 +1246,7 @@ void FixNPTCauchy::remap() domain->lamda2x(x[i],x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(1); + for (auto &ifix : rfix) ifix->deform(1); } /* ---------------------------------------------------------------------- @@ -1432,23 +1408,20 @@ int FixNPTCauchy::modify_param(int narg, char **arg) delete[] id_temp; id_temp = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) - error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) error->all(FLERR,"Could not find fix_modify temperature ID {}", id_temp); if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); + error->all(FLERR,"Fix_modify temperature ID {} does not compute temperature", id_temp); if (temperature->igroup != 0 && comm->me == 0) error->warning(FLERR,"Temperature for fix modify is not for group all"); // reset id_temp of pressure to new temperature ID if (pstat_flag) { - icompute = modify->find_compute(id_press); - if (icompute < 0) - error->all(FLERR,"Pressure ID for fix modify does not exist"); - modify->compute[icompute]->reset_extra_compute_fix(id_temp); + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Pressure ID {} for fix modify does not exist", id_press); + pressure->reset_extra_compute_fix(id_temp); } return 2; @@ -1463,12 +1436,11 @@ int FixNPTCauchy::modify_param(int narg, char **arg) delete[] id_press; id_press = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); - pressure = modify->compute[icompute]; + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Could not find fix_modify pressure ID {}", id_press); if (pressure->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); + error->all(FLERR,"Fix_modify pressure ID {} does not compute pressure", id_press); return 2; } diff --git a/src/EXTRA-FIX/fix_npt_cauchy.h b/src/EXTRA-FIX/fix_npt_cauchy.h index e7e6630208..43a944acb4 100644 --- a/src/EXTRA-FIX/fix_npt_cauchy.h +++ b/src/EXTRA-FIX/fix_npt_cauchy.h @@ -73,9 +73,8 @@ class FixNPTCauchy : public Fix { double drag, tdrag_factor; // drag factor on particle thermostat double pdrag_factor; // drag factor on barostat int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes int dilate_group_bit; // mask for dilation group - int *rfix; // indices of rigid fixes + std::vector rfix; // indices of rigid fixes char *id_dilate; // group name to dilate class Irregular *irregular; // for migrating atoms after box flips diff --git a/src/QTB/fix_qbmsst.cpp b/src/QTB/fix_qbmsst.cpp index b5fb5ca77c..2450561363 100644 --- a/src/QTB/fix_qbmsst.cpp +++ b/src/QTB/fix_qbmsst.cpp @@ -228,7 +228,6 @@ FixQBMSST::FixQBMSST(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) pressure = nullptr; pe = nullptr; old_velocity = nullptr; - rfix = nullptr; gfactor = nullptr; random = nullptr; omega_H = nullptr; @@ -263,17 +262,16 @@ FixQBMSST::FixQBMSST(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) FixQBMSST::~FixQBMSST() { - delete [] rfix; - delete [] gfactor; + delete[] gfactor; delete random; // delete temperature and pressure if fix created them if (tflag) modify->delete_compute(id_temp); if (pflag) modify->delete_compute(id_press); if (peflag) modify->delete_compute(id_pe); - delete [] id_temp; - delete [] id_press; - delete [] id_pe; + delete[] id_temp; + delete[] id_press; + delete[] id_pe; memory->destroy(old_velocity); memory->destroy(fran); @@ -385,18 +383,10 @@ void FixQBMSST::init() else kspace_flag = 0; // detect if any fix rigid exist so rigid bodies move when box is dilated - // rfix[] = indices to each fix rigid - nrigid = 0; - for (int i = 0; i < modify->nfix; i++) - if (utils::strmatch(modify->fix[i]->style,"^rigid") || - (strcmp(modify->fix[i]->style,"poems") == 0)) nrigid++; - if (nrigid > 0) { - rfix = new int[nrigid]; - nrigid = 0; - for (int i = 0; i < modify->nfix; i++) - if (utils::strmatch(modify->fix[i]->style,"^rigid") || - (strcmp(modify->fix[i]->style,"poems") == 0)) rfix[nrigid++] = i; - } + + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); } /* ---------------------------------------------------------------------- @@ -787,9 +777,7 @@ void FixQBMSST::remap(int flag) domain->x2lamda(n); - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(0); + for (auto &ifix : rfix) ifix->deform(0); // reset global and local box to new size/shape @@ -810,9 +798,7 @@ void FixQBMSST::remap(int flag) domain->lamda2x(n); - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(1); + for (auto &ifix : rfix) ifix->deform(1); for (i = 0; i < n; i++) { v[i][direction] = v[i][direction] * @@ -868,7 +854,7 @@ int FixQBMSST::modify_param(int narg, char **arg) modify->delete_compute(id_temp); tflag = 0; } - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); int icompute = modify->find_compute(id_temp); @@ -888,7 +874,7 @@ int FixQBMSST::modify_param(int narg, char **arg) modify->delete_compute(id_press); pflag = 0; } - delete [] id_press; + delete[] id_press; id_press = utils::strdup(arg[1]); int icompute = modify->find_compute(id_press); diff --git a/src/QTB/fix_qbmsst.h b/src/QTB/fix_qbmsst.h index ecfa5abf8e..cccb4e6a17 100644 --- a/src/QTB/fix_qbmsst.h +++ b/src/QTB/fix_qbmsst.h @@ -78,8 +78,7 @@ class FixQBMSST : public Fix { double omega[3]; // Time derivative of the volume. double total_mass; // Mass of the computational cell int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes - int *rfix; // indices of rigid fixes + std::vector rfix; // indices of rigid fixes double p_current[3]; // pressure double velocity_sum; // Sum of the velocities squared. double lagrangian_position; // Lagrangian location of computational cell diff --git a/src/SHOCK/fix_msst.cpp b/src/SHOCK/fix_msst.cpp index a4c9db3fd7..55842250ec 100644 --- a/src/SHOCK/fix_msst.cpp +++ b/src/SHOCK/fix_msst.cpp @@ -41,11 +41,10 @@ using namespace FixConst; /* ---------------------------------------------------------------------- */ FixMSST::FixMSST(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), old_velocity(nullptr), rfix(nullptr), - id_temp(nullptr), id_press(nullptr), id_pe(nullptr), temperature(nullptr), - pressure(nullptr), pe(nullptr) + Fix(lmp, narg, arg), old_velocity(nullptr), id_temp(nullptr), id_press(nullptr), id_pe(nullptr), + temperature(nullptr), pressure(nullptr), pe(nullptr) { - if (narg < 4) error->all(FLERR,"Illegal fix msst command"); + if (narg < 4) error->all(FLERR, "Illegal fix msst command"); restart_global = 1; time_integrate = 1; @@ -80,95 +79,103 @@ FixMSST::FixMSST(LAMMPS *lmp, int narg, char **arg) : dftb = 0; beta = 0.0; - if (strcmp(arg[3],"x") == 0) { + if (strcmp(arg[3], "x") == 0) { direction = 0; box_change |= BOX_CHANGE_X; - } else if (strcmp(arg[3],"y") == 0) { + } else if (strcmp(arg[3], "y") == 0) { direction = 1; box_change |= BOX_CHANGE_Y; - } else if (strcmp(arg[3],"z") == 0) { + } else if (strcmp(arg[3], "z") == 0) { direction = 2; box_change |= BOX_CHANGE_Z; - } else error->all(FLERR,"Illegal fix msst command"); + } else + error->all(FLERR, "Illegal fix msst command"); - velocity = utils::numeric(FLERR,arg[4],false,lmp); - if (velocity < 0) error->all(FLERR,"Illegal fix msst command"); + velocity = utils::numeric(FLERR, arg[4], false, lmp); + if (velocity < 0) error->all(FLERR, "Illegal fix msst command"); // optional args int iarg = 5; while (iarg < narg) { - if (strcmp(arg[iarg],"q") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - qmass = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (strcmp(arg[iarg], "q") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + qmass = utils::numeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"mu") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - mu = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "mu") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + mu = utils::numeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"p0") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - p0 = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "p0") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + p0 = utils::numeric(FLERR, arg[iarg + 1], false, lmp); p0_set = 1; iarg += 2; - } else if (strcmp(arg[iarg],"v0") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - v0 = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "v0") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + v0 = utils::numeric(FLERR, arg[iarg + 1], false, lmp); v0_set = 1; iarg += 2; - } else if (strcmp(arg[iarg],"e0") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - e0 = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "e0") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + e0 = utils::numeric(FLERR, arg[iarg + 1], false, lmp); e0_set = 1; iarg += 2; - } else if (strcmp(arg[iarg],"tscale") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - tscale = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "tscale") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + tscale = utils::numeric(FLERR, arg[iarg + 1], false, lmp); if (tscale < 0.0 || tscale > 1.0) - error->all(FLERR,"Fix msst tscale must satisfy 0 <= tscale < 1"); + error->all(FLERR, "Fix msst tscale must satisfy 0 <= tscale < 1"); iarg += 2; - } else if (strcmp(arg[iarg],"dftb") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - dftb = utils::logical(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "dftb") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + dftb = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"beta") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix msst command"); - beta = utils::numeric(FLERR,arg[iarg+1],false,lmp); - if (beta < 0.0 || beta > 1.0) - error->all(FLERR,"Illegal fix msst command"); + } else if (strcmp(arg[iarg], "beta") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix msst command"); + beta = utils::numeric(FLERR, arg[iarg + 1], false, lmp); + if (beta < 0.0 || beta > 1.0) error->all(FLERR, "Illegal fix msst command"); iarg += 2; - } else error->all(FLERR,"Illegal fix msst command"); + } else + error->all(FLERR, "Illegal fix msst command"); } // output MSST info if (comm->me == 0) { std::string mesg = "MSST parameters:\n"; - if (direction == 0) mesg += " Shock in x direction\n"; - else if (direction == 1) mesg += " Shock in y direction\n"; - else if (direction == 2) mesg += " Shock in z direction\n"; + if (direction == 0) + mesg += " Shock in x direction\n"; + else if (direction == 1) + mesg += " Shock in y direction\n"; + else if (direction == 2) + mesg += " Shock in z direction\n"; mesg += fmt::format(" Cell mass-like parameter qmass " - "(units of mass^2/length^4) = {:.8g}\n", qmass); + "(units of mass^2/length^4) = {:.8g}\n", + qmass); mesg += fmt::format(" Shock velocity = {:.8g}\n", velocity); mesg += fmt::format(" Artificial viscosity (units of mass/length/time) = {:.8g}\n", mu); if (p0_set) mesg += fmt::format(" Initial pressure specified to be {:.8g}\n", p0); - else mesg += " Initial pressure calculated on first step\n"; + else + mesg += " Initial pressure calculated on first step\n"; if (v0_set) mesg += fmt::format(" Initial volume specified to be {:.8g}\n", v0); - else mesg += " Initial volume calculated on first step\n"; + else + mesg += " Initial volume calculated on first step\n"; if (e0_set) mesg += fmt::format(" Initial energy specified to be {:.8g}\n", e0); - else mesg += " Initial energy calculated on first step\n"; - utils::logmesg(lmp,mesg); + else + mesg += " Initial energy calculated on first step\n"; + utils::logmesg(lmp, mesg); } // check for periodicity in controlled dimensions - if (domain->nonperiodic) error->all(FLERR,"Fix msst requires a periodic box"); + if (domain->nonperiodic) error->all(FLERR, "Fix msst requires a periodic box"); // create a new temperature compute // id = fix-ID + "MSST_temp" @@ -200,8 +207,6 @@ FixMSST::FixMSST(LAMMPS *lmp, int narg, char **arg) : // initialize the time derivative of the volume omega[0] = omega[1] = omega[2] = 0.0; - nrigid = 0; - rfix = nullptr; maxold = -1; old_velocity = nullptr; @@ -211,17 +216,15 @@ FixMSST::FixMSST(LAMMPS *lmp, int narg, char **arg) : FixMSST::~FixMSST() { - delete [] rfix; - // delete temperature and pressure if fix created them if (tflag) modify->delete_compute(id_temp); if (pflag) modify->delete_compute(id_press); if (peflag) modify->delete_compute(id_pe); - delete [] id_temp; - delete [] id_press; - delete [] id_pe; + delete[] id_temp; + delete[] id_press; + delete[] id_pe; memory->destroy(old_velocity); } @@ -240,22 +243,20 @@ int FixMSST::setmask() void FixMSST::init() { - if (atom->mass == nullptr) - error->all(FLERR,"Cannot use fix msst without per-type mass defined"); + if (atom->mass == nullptr) error->all(FLERR, "Cannot use fix msst without per-type mass defined"); // set compute ptrs int itemp = modify->find_compute(id_temp); int ipress = modify->find_compute(id_press); int ipe = modify->find_compute(id_pe); - if (itemp < 0 || ipress < 0|| ipe < 0) - error->all(FLERR,"Could not find fix msst compute ID"); + if (itemp < 0 || ipress < 0 || ipe < 0) error->all(FLERR, "Could not find fix msst compute ID"); if (modify->compute[itemp]->tempflag == 0) - error->all(FLERR,"Fix msst compute ID does not compute temperature"); + error->all(FLERR, "Fix msst compute ID does not compute temperature"); if (modify->compute[ipress]->pressflag == 0) - error->all(FLERR,"Fix msst compute ID does not compute pressure"); + error->all(FLERR, "Fix msst compute ID does not compute pressure"); if (modify->compute[ipe]->peflag == 0) - error->all(FLERR,"Fix msst compute ID does not compute potential energy"); + error->all(FLERR, "Fix msst compute ID does not compute potential energy"); temperature = modify->compute[itemp]; pressure = modify->compute[ipress]; @@ -271,37 +272,27 @@ void FixMSST::init() double mass = 0.0; for (int i = 0; i < atom->nlocal; i++) mass += atom->mass[atom->type[i]]; - MPI_Allreduce(&mass,&total_mass,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&mass, &total_mass, 1, MPI_DOUBLE, MPI_SUM, world); - if (force->kspace) kspace_flag = 1; - else kspace_flag = 0; + if (force->kspace) + kspace_flag = 1; + else + kspace_flag = 0; // detect if any fix rigid exist so rigid bodies move when box is dilated - // rfix[] = indices to each fix rigid - delete [] rfix; - nrigid = 0; - rfix = nullptr; - - for (int i = 0; i < modify->nfix; i++) - if (utils::strmatch(modify->fix[i]->style,"^rigid") || - utils::strmatch(modify->fix[i]->style,"^poems$")) nrigid++; - if (nrigid) { - rfix = new int[nrigid]; - nrigid = 0; - for (int i = 0; i < modify->nfix; i++) - if (utils::strmatch(modify->fix[i]->style,"^rigid") || - utils::strmatch(modify->fix[i]->style,"^poems$")) rfix[nrigid++] = i; - } + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); // find fix external being used to drive LAMMPS from DFTB+ if (dftb) { for (int i = 0; i < modify->nfix; i++) - if (utils::strmatch(modify->fix[i]->style,"^external$")) + if (utils::strmatch(modify->fix[i]->style, "^external$")) fix_external = dynamic_cast(modify->fix[i]); if (fix_external == nullptr) - error->all(FLERR,"Fix msst dftb cannot be used w/out fix external"); + error->all(FLERR, "Fix msst dftb cannot be used w/out fix external"); } } @@ -321,29 +312,26 @@ void FixMSST::setup(int /*vflag*/) if (v0_set == 0) { v0 = compute_vol(); v0_set = 1; - if (comm->me == 0) - utils::logmesg(lmp,"Fix MSST v0 = {:.8g}\n", v0); + if (comm->me == 0) utils::logmesg(lmp, "Fix MSST v0 = {:.8g}\n", v0); } if (p0_set == 0) { p0 = p_current[direction]; p0_set = 1; - if (comm->me == 0) - utils::logmesg(lmp,"Fix MSST p0 = {:.8g}\n", p0); + if (comm->me == 0) utils::logmesg(lmp, "Fix MSST p0 = {:.8g}\n", p0); } if (e0_set == 0) { e0 = compute_etotal(); e0_set = 1; - if (comm->me == 0) - utils::logmesg(lmp,"Fix MSST e0 = {:.8g}\n", e0); + if (comm->me == 0) utils::logmesg(lmp, "Fix MSST e0 = {:.8g}\n", e0); } temperature->compute_vector(); double *ke_tensor = temperature->vector; - double ke_temp = ke_tensor[0]+ke_tensor[1]+ke_tensor[2]; + double ke_temp = ke_tensor[0] + ke_tensor[1] + ke_tensor[2]; if (ke_temp > 0.0 && tscale > 0.0) { // transfer energy from atom velocities to cell volume motion @@ -351,30 +339,30 @@ void FixMSST::setup(int /*vflag*/) double **v = atom->v; int *mask = atom->mask; - double sqrt_initial_temperature_scaling = sqrt(1.0-tscale); + double sqrt_initial_temperature_scaling = sqrt(1.0 - tscale); - double fac1 = tscale*total_mass/qmass*ke_temp/force->mvv2e; + double fac1 = tscale * total_mass / qmass * ke_temp / force->mvv2e; - omega[direction]=-1*sqrt(fac1); - double fac2 = omega[direction]/v0; + omega[direction] = -1 * sqrt(fac1); + double fac2 = omega[direction] / v0; - if ( comm->me == 0 && tscale != 1.0) - utils::logmesg(lmp,"Fix MSST initial strain rate of {:.8g} " + if (comm->me == 0 && tscale != 1.0) + utils::logmesg(lmp, + "Fix MSST initial strain rate of {:.8g} " "established by reducing temperature by factor " - "of {:.8g}\n",fac2,tscale); + "of {:.8g}\n", + fac2, tscale); for (int i = 0; i < atom->nlocal; i++) { if (mask[i] & groupbit) { - for (int k = 0; k < 3; k++) { - v[i][k]*=sqrt_initial_temperature_scaling; - } + for (int k = 0; k < 3; k++) { v[i][k] *= sqrt_initial_temperature_scaling; } } } } // trigger virial computation on next timestep - pe->addstep(update->ntimestep+1); - pressure->addstep(update->ntimestep+1); + pe->addstep(update->ntimestep + 1); + pressure->addstep(update->ntimestep + 1); } /* ---------------------------------------------------------------------- @@ -383,8 +371,8 @@ void FixMSST::setup(int /*vflag*/) void FixMSST::initial_integrate(int /*vflag*/) { - int i,k; - double p_msst; // MSST driving pressure + int i, k; + double p_msst; // MSST driving pressure double vol; int nlocal = atom->nlocal; @@ -401,7 +389,7 @@ void FixMSST::initial_integrate(int /*vflag*/) if (nlocal > maxold) { memory->destroy(old_velocity); maxold = atom->nmax; - memory->create(old_velocity,maxold,3,"msst:old_velocity"); + memory->create(old_velocity, maxold, 3, "msst:old_velocity"); } // for DFTB, extract TS_dftb from fix external @@ -409,14 +397,14 @@ void FixMSST::initial_integrate(int /*vflag*/) if (dftb) { const double TS_dftb = fix_external->compute_vector(0); - const double TS = force->ftm2v*TS_dftb; + const double TS = force->ftm2v * TS_dftb; // update S_elec terms and compute TS_dot via finite differences S_elec_2 = S_elec_1; S_elec_1 = S_elec; const double Temp = temperature->compute_scalar(); - S_elec = TS/Temp; - TS_dot = Temp*(3.0*S_elec-4.0*S_elec_1+S_elec_2)/(2.0*update->dt); - TS_int += (update->dt*TS_dot); + S_elec = TS / Temp; + TS_dot = Temp * (3.0 * S_elec - 4.0 * S_elec_1 + S_elec_2) / (2.0 * update->dt); + TS_int += (update->dt * TS_dot); if (update->ntimestep == 1) T0S0 = TS; } @@ -434,11 +422,9 @@ void FixMSST::initial_integrate(int /*vflag*/) // propagate the time derivative of // the volume 1/2 step at fixed vol, r, rdot - p_msst = nktv2p * mvv2e * velocity * velocity * total_mass * - ( v0 - vol)/( v0 * v0); - double A = total_mass * ( p_current[sd] - p0 - p_msst ) / - (qmass * nktv2p * mvv2e); - double B = total_mass * mu / ( qmass * vol ); + p_msst = nktv2p * mvv2e * velocity * velocity * total_mass * (v0 - vol) / (v0 * v0); + double A = total_mass * (p_current[sd] - p0 - p_msst) / (qmass * nktv2p * mvv2e); + double B = total_mass * mu / (qmass * vol); // prevent blow-up of the volume @@ -447,11 +433,10 @@ void FixMSST::initial_integrate(int /*vflag*/) // use Taylor expansion to avoid singularity at B = 0 if (B * dthalf > 1.0e-06) { - omega[sd] = ( omega[sd] + A * ( exp(B * dthalf) - 1.0 ) / B ) - * exp(-B * dthalf); + omega[sd] = (omega[sd] + A * (exp(B * dthalf) - 1.0) / B) * exp(-B * dthalf); } else { omega[sd] = omega[sd] + (A - B * omega[sd]) * dthalf + - 0.5 * (B * B * omega[sd] - A * B ) * dthalf * dthalf; + 0.5 * (B * B * omega[sd] - A * B) * dthalf * dthalf; } // propagate velocity sum 1/2 step by @@ -464,20 +449,19 @@ void FixMSST::initial_integrate(int /*vflag*/) if (mask[i] & groupbit) { for (k = 0; k < 3; k++) { const double C = f[i][k] * force->ftm2v / mass[type[i]]; - const double TS_term = TS_dot/(mass[type[i]]*velocity_sum); - const double escale_term = force->ftm2v*beta*(e0-e_scale) / - (mass[type[i]]*velocity_sum); - double D = mu * omega[sd] * omega[sd] / - (velocity_sum * mass[type[i]] * vol ); + const double TS_term = TS_dot / (mass[type[i]] * velocity_sum); + const double escale_term = + force->ftm2v * beta * (e0 - e_scale) / (mass[type[i]] * velocity_sum); + double D = mu * omega[sd] * omega[sd] / (velocity_sum * mass[type[i]] * vol); D += escale_term - TS_term; old_velocity[i][k] = v[i][k]; if (k == direction) D -= 2.0 * omega[sd] / vol; if (fabs(dthalf * D) > 1.0e-06) { const double expd = exp(D * dthalf); - v[i][k] = expd * ( C + D * v[i][k] - C / expd ) / D; + v[i][k] = expd * (C + D * v[i][k] - C / expd) / D; } else { - v[i][k] = v[i][k] + ( C + D * v[i][k] ) * dthalf + - 0.5 * (D * D * v[i][k] + C * D ) * dthalf * dthalf; + v[i][k] = v[i][k] + (C + D * v[i][k]) * dthalf + + 0.5 * (D * D * v[i][k] + C * D) * dthalf * dthalf; } } } @@ -487,18 +471,15 @@ void FixMSST::initial_integrate(int /*vflag*/) if (mask[i] & groupbit) { for (k = 0; k < 3; k++) { const double C = f[i][k] * force->ftm2v / mass[type[i]]; - double D = mu * omega[sd] * omega[sd] / - (velocity_sum * mass[type[i]] * vol ); + double D = mu * omega[sd] * omega[sd] / (velocity_sum * mass[type[i]] * vol); old_velocity[i][k] = v[i][k]; - if (k == direction) { - D -= 2.0 * omega[sd] / vol; - } + if (k == direction) { D -= 2.0 * omega[sd] / vol; } if (fabs(dthalf * D) > 1.0e-06) { const double expd = exp(D * dthalf); - v[i][k] = expd * ( C + D * v[i][k] - C / expd ) / D; + v[i][k] = expd * (C + D * v[i][k] - C / expd) / D; } else { - v[i][k] = v[i][k] + ( C + D * v[i][k] ) * dthalf + - 0.5 * (D * D * v[i][k] + C * D ) * dthalf * dthalf; + v[i][k] = v[i][k] + (C + D * v[i][k]) * dthalf + + 0.5 * (D * D * v[i][k] + C * D) * dthalf * dthalf; } } } @@ -524,19 +505,18 @@ void FixMSST::initial_integrate(int /*vflag*/) if (mask[i] & groupbit) { for (k = 0; k < 3; k++) { const double C = f[i][k] * force->ftm2v / mass[type[i]]; - const double TS_term = TS_dot/(mass[type[i]]*velocity_sum); - const double escale_term = force->ftm2v*beta*(e0-e_scale) / - (mass[type[i]]*velocity_sum); - double D = mu * omega[sd] * omega[sd] / - (velocity_sum * mass[type[i]] * vol ); + const double TS_term = TS_dot / (mass[type[i]] * velocity_sum); + const double escale_term = + force->ftm2v * beta * (e0 - e_scale) / (mass[type[i]] * velocity_sum); + double D = mu * omega[sd] * omega[sd] / (velocity_sum * mass[type[i]] * vol); D += escale_term - TS_term; if (k == direction) D -= 2.0 * omega[sd] / vol; if (fabs(dthalf * D) > 1.0e-06) { const double expd = exp(D * dthalf); - v[i][k] = expd * ( C + D * v[i][k] - C / expd ) / D; + v[i][k] = expd * (C + D * v[i][k] - C / expd) / D; } else { - v[i][k] = v[i][k] + ( C + D * v[i][k] ) * dthalf + - 0.5 * (D * D * v[i][k] + C * D ) * dthalf * dthalf; + v[i][k] = v[i][k] + (C + D * v[i][k]) * dthalf + + 0.5 * (D * D * v[i][k] + C * D) * dthalf * dthalf; } } } @@ -546,17 +526,14 @@ void FixMSST::initial_integrate(int /*vflag*/) if (mask[i] & groupbit) { for (k = 0; k < 3; k++) { const double C = f[i][k] * force->ftm2v / mass[type[i]]; - double D = mu * omega[sd] * omega[sd] / - (velocity_sum * mass[type[i]] * vol ); - if (k == direction) { - D -= 2.0 * omega[sd] / vol; - } + double D = mu * omega[sd] * omega[sd] / (velocity_sum * mass[type[i]] * vol); + if (k == direction) { D -= 2.0 * omega[sd] / vol; } if (fabs(dthalf * D) > 1.0e-06) { const double expd = exp(D * dthalf); - v[i][k] = expd * ( C + D * v[i][k] - C / expd ) / D; + v[i][k] = expd * (C + D * v[i][k] - C / expd) / D; } else { - v[i][k] = v[i][k] + ( C + D * v[i][k] ) * dthalf + - 0.5 * (D * D * v[i][k] + C * D ) * dthalf * dthalf; + v[i][k] = v[i][k] + (C + D * v[i][k]) * dthalf + + 0.5 * (D * D * v[i][k] + C * D) * dthalf * dthalf; } } } @@ -569,7 +546,7 @@ void FixMSST::initial_integrate(int /*vflag*/) // rescale positions and change box size - dilation[sd] = vol1/vol; + dilation[sd] = vol1 / vol; remap(0); // propagate particle positions 1 time step @@ -588,7 +565,7 @@ void FixMSST::initial_integrate(int /*vflag*/) // rescale positions and change box size - dilation[sd] = vol2/vol1; + dilation[sd] = vol2 / vol1; remap(0); if (kspace_flag) force->kspace->setup(); @@ -601,7 +578,7 @@ void FixMSST::initial_integrate(int /*vflag*/) void FixMSST::final_integrate() { int i; - double p_msst; // MSST driving pressure + double p_msst; // MSST driving pressure // v update only for atoms in MSST group @@ -624,14 +601,14 @@ void FixMSST::final_integrate() if (dftb) { const double TS_dftb = fix_external->compute_vector(0); - const double TS = force->ftm2v*TS_dftb; + const double TS = force->ftm2v * TS_dftb; S_elec_2 = S_elec_1; S_elec_1 = S_elec; const double Temp = temperature->compute_scalar(); // update S_elec terms and compute TS_dot via finite differences - S_elec = TS/Temp; - TS_dot = Temp*(3.0*S_elec-4.0*S_elec_1+S_elec_2)/(2.0*update->dt); - TS_int += (update->dt*TS_dot); + S_elec = TS / Temp; + TS_dot = Temp * (3.0 * S_elec - 4.0 * S_elec_1 + S_elec_2) / (2.0 * update->dt); + TS_int += (update->dt * TS_dot); if (update->ntimestep == 1) T0S0 = TS; } @@ -642,19 +619,18 @@ void FixMSST::final_integrate() if (mask[i] & groupbit) { for (int k = 0; k < 3; k++) { const double C = f[i][k] * force->ftm2v / mass[type[i]]; - const double TS_term = TS_dot/(mass[type[i]]*velocity_sum); - const double escale_term = force->ftm2v*beta*(e0-e_scale) / - (mass[type[i]]*velocity_sum); - double D = mu * omega[sd] * omega[sd] / - (velocity_sum * mass[type[i]] * vol ); + const double TS_term = TS_dot / (mass[type[i]] * velocity_sum); + const double escale_term = + force->ftm2v * beta * (e0 - e_scale) / (mass[type[i]] * velocity_sum); + double D = mu * omega[sd] * omega[sd] / (velocity_sum * mass[type[i]] * vol); D += escale_term - TS_term; if (k == direction) D -= 2.0 * omega[sd] / vol; if (fabs(dthalf * D) > 1.0e-06) { const double expd = exp(D * dthalf); - v[i][k] = expd * ( C + D * v[i][k] - C / expd ) / D; + v[i][k] = expd * (C + D * v[i][k] - C / expd) / D; } else { - v[i][k] = v[i][k] + ( C + D * v[i][k] ) * dthalf + - 0.5 * (D * D * v[i][k] + C * D ) * dthalf * dthalf; + v[i][k] = v[i][k] + (C + D * v[i][k]) * dthalf + + 0.5 * (D * D * v[i][k] + C * D) * dthalf * dthalf; } } } @@ -664,17 +640,14 @@ void FixMSST::final_integrate() if (mask[i] & groupbit) { for (int k = 0; k < 3; k++) { const double C = f[i][k] * force->ftm2v / mass[type[i]]; - double D = mu * omega[sd] * omega[sd] / - (velocity_sum * mass[type[i]] * vol ); - if (k == direction) { - D -= 2.0 * omega[sd] / vol; - } + double D = mu * omega[sd] * omega[sd] / (velocity_sum * mass[type[i]] * vol); + if (k == direction) { D -= 2.0 * omega[sd] / vol; } if (fabs(dthalf * D) > 1.0e-06) { const double expd = exp(D * dthalf); - v[i][k] = expd * ( C + D * v[i][k] - C / expd ) / D; + v[i][k] = expd * (C + D * v[i][k] - C / expd) / D; } else { - v[i][k] = v[i][k] + ( C + D * v[i][k] ) * dthalf + - 0.5 * (D * D * v[i][k] + C * D ) * dthalf * dthalf; + v[i][k] = v[i][k] + (C + D * v[i][k]) * dthalf + + 0.5 * (D * D * v[i][k] + C * D) * dthalf * dthalf; } } } @@ -692,11 +665,9 @@ void FixMSST::final_integrate() // propagate the time derivative of the volume 1/2 step at fixed V, r, rdot - p_msst = nktv2p * mvv2e * velocity * velocity * total_mass * - ( v0 - vol )/( v0 * v0 ); - double A = total_mass * ( p_current[sd] - p0 - p_msst ) / - ( qmass * nktv2p * mvv2e ); - const double B = total_mass * mu / ( qmass * vol ); + p_msst = nktv2p * mvv2e * velocity * velocity * total_mass * (v0 - vol) / (v0 * v0); + double A = total_mass * (p_current[sd] - p0 - p_msst) / (qmass * nktv2p * mvv2e); + const double B = total_mass * mu / (qmass * vol); // prevent blow-up of the volume @@ -705,21 +676,20 @@ void FixMSST::final_integrate() // use taylor expansion to avoid singularity at B == 0. if (B * dthalf > 1.0e-06) { - omega[sd] = ( omega[sd] + A * - ( exp(B * dthalf) - 1.0 ) / B ) * exp(-B * dthalf); + omega[sd] = (omega[sd] + A * (exp(B * dthalf) - 1.0) / B) * exp(-B * dthalf); } else { omega[sd] = omega[sd] + (A - B * omega[sd]) * dthalf + - 0.5 * (B * B * omega[sd] - A * B ) * dthalf * dthalf; + 0.5 * (B * B * omega[sd] - A * B) * dthalf * dthalf; } // calculate Lagrangian position of computational cell - lagrangian_position -= velocity*vol/v0*update->dt; + lagrangian_position -= velocity * vol / v0 * update->dt; // trigger energy and virial computation on next timestep - pe->addstep(update->ntimestep+1); - pressure->addstep(update->ntimestep+1); + pe->addstep(update->ntimestep + 1); + pressure->addstep(update->ntimestep + 1); } /* ---------------------------------------------------------------------- */ @@ -741,20 +711,20 @@ void FixMSST::couple() void FixMSST::remap(int flag) { - int i,n; - double oldlo,oldhi,ctr; + int i, n; + double oldlo, oldhi, ctr; double **v = atom->v; - if (flag) n = atom->nlocal + atom->nghost; - else n = atom->nlocal; + if (flag) + n = atom->nlocal + atom->nghost; + else + n = atom->nlocal; // convert pertinent atoms and rigid bodies to lamda coords domain->x2lamda(n); - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(0); + for (auto &ifix : rfix) ifix->deform(0); // reset global and local box to new size/shape @@ -763,8 +733,8 @@ void FixMSST::remap(int flag) oldlo = domain->boxlo[i]; oldhi = domain->boxhi[i]; ctr = 0.5 * (oldlo + oldhi); - domain->boxlo[i] = (oldlo-ctr)*dilation[i] + ctr; - domain->boxhi[i] = (oldhi-ctr)*dilation[i] + ctr; + domain->boxlo[i] = (oldlo - ctr) * dilation[i] + ctr; + domain->boxhi[i] = (oldhi - ctr) * dilation[i] + ctr; } } @@ -775,14 +745,9 @@ void FixMSST::remap(int flag) domain->lamda2x(n); - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(1); + for (auto &ifix : rfix) ifix->deform(1); - for (i = 0; i < n; i++) { - v[i][direction] = v[i][direction] * - dilation[direction]; - } + for (i = 0; i < n; i++) v[i][direction] = v[i][direction] * dilation[direction]; } /* ---------------------------------------------------------------------- @@ -800,8 +765,8 @@ void FixMSST::write_restart(FILE *fp) list[n++] = TS_int; if (comm->me == 0) { int size = n * sizeof(double); - fwrite(&size,sizeof(int),1,fp); - fwrite(&list,sizeof(double),n,fp); + fwrite(&size, sizeof(int), 1, fp); + fwrite(&list, sizeof(double), n, fp); } } @@ -818,7 +783,7 @@ void FixMSST::restart(char *buf) v0 = list[n++]; p0 = list[n++]; TS_int = list[n++]; - tscale = 0.0; // set tscale to zero for restart + tscale = 0.0; // set tscale to zero for restart p0_set = 1; v0_set = 1; e0_set = 1; @@ -828,43 +793,43 @@ void FixMSST::restart(char *buf) int FixMSST::modify_param(int narg, char **arg) { - if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + if (strcmp(arg[0], "temp") == 0) { + if (narg < 2) error->all(FLERR, "Illegal fix_modify command"); if (tflag) { modify->delete_compute(id_temp); tflag = 0; } - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Could not find fix_modify temperature ID"); + if (icompute < 0) error->all(FLERR, "Could not find fix_modify temperature ID"); temperature = modify->compute[icompute]; if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not " + error->all(FLERR, + "Fix_modify temperature ID does not " "compute temperature"); if (temperature->igroup != 0 && comm->me == 0) - error->warning(FLERR,"Temperature for MSST is not for group all"); + error->warning(FLERR, "Temperature for MSST is not for group all"); return 2; - } else if (strcmp(arg[0],"press") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + } else if (strcmp(arg[0], "press") == 0) { + if (narg < 2) error->all(FLERR, "Illegal fix_modify command"); if (pflag) { modify->delete_compute(id_press); pflag = 0; } - delete [] id_press; + delete[] id_press; id_press = utils::strdup(arg[1]); int icompute = modify->find_compute(id_press); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); + if (icompute < 0) error->all(FLERR, "Could not find fix_modify pressure ID"); pressure = modify->compute[icompute]; if (pressure->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); + error->all(FLERR, "Fix_modify pressure ID does not compute pressure"); return 2; } return 0; @@ -887,10 +852,9 @@ double FixMSST::compute_scalar() i = direction; energy = qmass * omega[i] * omega[i] / (2.0 * total_mass) * mvv2e; - energy -= 0.5 * total_mass * velocity * velocity * - (1.0 - volume/ v0) * - (1.0 - volume/ v0) * mvv2e; - energy -= p0 * ( v0 - volume ) / nktv2p; + energy -= + 0.5 * total_mass * velocity * velocity * (1.0 - volume / v0) * (1.0 - volume / v0) * mvv2e; + energy -= p0 * (v0 - volume) / nktv2p; // subtract off precomputed TS_int integral value // TS_int = 0 for non DFTB calculations @@ -938,8 +902,7 @@ double FixMSST::compute_hugoniot() v = compute_vol(); - dhugo = (0.5 * (p + p0 ) * ( v0 - v)) / - force->nktv2p + e0 - e; + dhugo = (0.5 * (p + p0) * (v0 - v)) / force->nktv2p + e0 - e; dhugo /= temperature->dof * force->boltz; return dhugo; @@ -964,8 +927,7 @@ double FixMSST::compute_rayleigh() v = compute_vol(); drayleigh = p - p0 - - total_mass * velocity * velocity * force->mvv2e * - (1.0 - v / v0 ) * force->nktv2p / v0; + total_mass * velocity * velocity * force->mvv2e * (1.0 - v / v0) * force->nktv2p / v0; return drayleigh; } @@ -978,7 +940,7 @@ double FixMSST::compute_rayleigh() double FixMSST::compute_lagrangian_speed() { double v = compute_vol(); - return velocity*(1.0-v/v0); + return velocity * (1.0 - v / v0); } /* ---------------------------------------------------------------------- @@ -988,7 +950,7 @@ double FixMSST::compute_lagrangian_speed() double FixMSST::compute_lagrangian_position() { - return lagrangian_position; + return lagrangian_position; } /* ---------------------------------------------------------------------- */ @@ -997,11 +959,11 @@ double FixMSST::compute_etotal() { if (!pe) return 0.0; - double epot,ekin,etot; + double epot, ekin, etot; epot = pe->compute_scalar(); ekin = temperature->compute_scalar(); ekin *= 0.5 * temperature->dof * force->boltz; - etot = epot+ekin; + etot = epot + ekin; return etot; } @@ -1028,12 +990,10 @@ double FixMSST::compute_vsum() double t = 0.0; for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) ; - } + if (mask[i] & groupbit) { t += (v[i][0] * v[i][0] + v[i][1] * v[i][1] + v[i][2] * v[i][2]); } } - MPI_Allreduce(&t,&vsum,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&t, &vsum, 1, MPI_DOUBLE, MPI_SUM, world); return vsum; } @@ -1043,6 +1003,6 @@ double FixMSST::compute_vsum() double FixMSST::memory_usage() { - double bytes = 3*atom->nmax * sizeof(double); + double bytes = 3 * atom->nmax * sizeof(double); return bytes; } diff --git a/src/SHOCK/fix_msst.h b/src/SHOCK/fix_msst.h index 8cd3f79a89..c7d4983dc4 100644 --- a/src/SHOCK/fix_msst.h +++ b/src/SHOCK/fix_msst.h @@ -64,9 +64,8 @@ class FixMSST : public Fix { double **old_velocity; // saved velocities - int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes - int *rfix; // indices of rigid fixes + int kspace_flag; // 1 if KSpace invoked, 0 if not + std::vector rfix; // indices of rigid fixes char *id_temp, *id_press; // strings with identifiers of char *id_pe; // created computes diff --git a/src/fix_press_berendsen.cpp b/src/fix_press_berendsen.cpp index 05e523abae..40dcdeeb10 100644 --- a/src/fix_press_berendsen.cpp +++ b/src/fix_press_berendsen.cpp @@ -223,17 +223,12 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) : id_press = utils::strdup(std::string(id) + "_press"); modify->add_compute(fmt::format("{} all pressure {}", id_press, id_temp)); pflag = 1; - - nrigid = 0; - rfix = nullptr; } /* ---------------------------------------------------------------------- */ FixPressBerendsen::~FixPressBerendsen() { - delete[] rfix; - // delete temperature and pressure if fix created them if (tflag) modify->delete_compute(id_temp); @@ -291,20 +286,10 @@ void FixPressBerendsen::init() kspace_flag = 0; // detect if any rigid fixes exist so rigid bodies move when box is remapped - // rfix[] = indices to each fix rigid - delete[] rfix; - nrigid = 0; - rfix = nullptr; - - for (const auto &ifix : modify->get_fix_list()) - if (ifix->rigid_flag) nrigid++; - if (nrigid > 0) { - rfix = new Fix *[nrigid]; - nrigid = 0; - for (auto &ifix : modify->get_fix_list()) - if (ifix->rigid_flag) rfix[nrigid++] = ifix; - } + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); } /* ---------------------------------------------------------------------- @@ -409,8 +394,7 @@ void FixPressBerendsen::remap() if (mask[i] & groupbit) domain->x2lamda(x[i], x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) rfix[i]->deform(0); + for (auto &ifix : rfix) ifix->deform(0); // reset global and local box to new size/shape @@ -436,8 +420,7 @@ void FixPressBerendsen::remap() if (mask[i] & groupbit) domain->lamda2x(x[i], x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) rfix[i]->deform(1); + for (auto &ifix : rfix) ifix->deform(1); } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_press_berendsen.h b/src/fix_press_berendsen.h index 9e83533746..ddbe31e7ed 100644 --- a/src/fix_press_berendsen.h +++ b/src/fix_press_berendsen.h @@ -44,9 +44,8 @@ class FixPressBerendsen : public Fix { double p_period[3], p_target[3]; double p_current[3], dilation[3]; double factor[3]; - int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes - class Fix **rfix; // indices of rigid fixes + int kspace_flag; // 1 if KSpace invoked, 0 if not + std::vector rfix; // indices of rigid fixes char *id_temp, *id_press; class Compute *temperature, *pressure; diff --git a/src/fix_press_langevin.cpp b/src/fix_press_langevin.cpp index 2f6e765cd5..752f826dfe 100644 --- a/src/fix_press_langevin.cpp +++ b/src/fix_press_langevin.cpp @@ -376,9 +376,6 @@ FixPressLangevin::FixPressLangevin(LAMMPS *lmp, int narg, char **arg) : (1.0 + p_alpha[i] * update->dt / 2.0 / p_mass[i]); gjfb[i] = 1. / (1.0 + p_alpha[i] * update->dt / 2.0 / p_mass[i]); } - - nrigid = 0; - rfix = nullptr; } /* ---------------------------------------------------------------------- */ @@ -386,7 +383,6 @@ FixPressLangevin::FixPressLangevin(LAMMPS *lmp, int narg, char **arg) : FixPressLangevin::~FixPressLangevin() { delete random; - delete[] rfix; delete irregular; // delete temperature and pressure if fix created them @@ -437,20 +433,10 @@ void FixPressLangevin::init() kspace_flag = 0; // detect if any rigid fixes exist so rigid bodies move when box is remapped - // rfix[] = indices to each fix rigid - delete[] rfix; - nrigid = 0; - rfix = nullptr; - - for (const auto &ifix : modify->get_fix_list()) - if (ifix->rigid_flag) nrigid++; - if (nrigid > 0) { - rfix = new Fix *[nrigid]; - nrigid = 0; - for (auto &ifix : modify->get_fix_list()) - if (ifix->rigid_flag) rfix[nrigid++] = ifix; - } + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); // Nullifies piston derivatives and forces so that it is not integrated at // the start of a second run. @@ -680,8 +666,7 @@ void FixPressLangevin::remap() if (mask[i] & groupbit) domain->x2lamda(x[i], x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) rfix[i]->deform(0); + for (auto &ifix : rfix) ifix->deform(0); // reset global and local box to new size/shape @@ -719,8 +704,7 @@ void FixPressLangevin::remap() if (mask[i] & groupbit) domain->lamda2x(x[i], x[i]); } - if (nrigid) - for (i = 0; i < nrigid; i++) rfix[i]->deform(1); + for (auto &ifix : rfix) ifix->deform(1); } /* ---------------------------------------------------------------------- diff --git a/src/fix_press_langevin.h b/src/fix_press_langevin.h index 868993b1f4..498f9443a7 100644 --- a/src/fix_press_langevin.h +++ b/src/fix_press_langevin.h @@ -51,9 +51,8 @@ class FixPressLangevin : public Fix { double p_deriv[6], dilation[6]; double f_piston[6], f_old_piston[6]; double gjfa[6], gjfb[6], fran[6]; - int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes - class Fix **rfix; // list of rigid fixes + int kspace_flag; // 1 if KSpace invoked, 0 if not + std::vector rfix; // indices of rigid fixes char *id_temp, *id_press; class Compute *temperature, *pressure; From 3390a11312e81f79592d44351b781473937c3e27 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 12 Nov 2023 10:04:04 -0500 Subject: [PATCH 018/116] improve error detection and messages --- src/variable.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/variable.cpp b/src/variable.cpp index 264dcf6258..9c0307341a 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -3397,18 +3397,18 @@ tagint Variable::int_between_brackets(char *&ptr, int varallow) char *id = start+2; int ivar = find(id); if (ivar < 0) - error->all(FLERR,"Invalid variable name in variable formula"); + error->all(FLERR,"Invalid variable name {} in variable formula", id); char *var = retrieve(id); if (var == nullptr) - error->all(FLERR,"Invalid variable evaluation in variable formula"); + error->all(FLERR,"Invalid variable evaluation for variable {} in variable formula", id); index = static_cast (atof(var)); } else index = ATOTAGINT(start); *ptr = ']'; - if (index == 0) + if (index <= 0) error->all(FLERR,"Index between variable brackets must be positive"); return index; } From 443f0666b706602fb97d0544a22effa0fa1e1c95 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 12 Nov 2023 16:03:25 -0500 Subject: [PATCH 019/116] refactor some more temperature/pressure/pe compute lookups --- src/EXTRA-FIX/fix_pafi.cpp | 29 +++++++++---------- src/EXTRA-FIX/fix_temp_csld.cpp | 32 +++++++++------------ src/EXTRA-FIX/fix_temp_csvr.cpp | 29 ++++++++----------- src/PLUMED/fix_plumed.cpp | 16 +++++------ src/QTB/fix_qbmsst.cpp | 48 ++++++++++++++----------------- src/QTB/fix_qtb.cpp | 12 ++++---- src/SHOCK/fix_msst.cpp | 51 ++++++++++++++------------------- src/SHOCK/fix_nphug.cpp | 9 +++--- src/UEF/fix_nh_uef.cpp | 20 ++++++------- 9 files changed, 105 insertions(+), 141 deletions(-) diff --git a/src/EXTRA-FIX/fix_pafi.cpp b/src/EXTRA-FIX/fix_pafi.cpp index 596bfdf667..05ee6b90da 100644 --- a/src/EXTRA-FIX/fix_pafi.cpp +++ b/src/EXTRA-FIX/fix_pafi.cpp @@ -77,14 +77,14 @@ FixPAFI::FixPAFI(LAMMPS *lmp, int narg, char **arg) : computename = utils::strdup(&arg[3][0]); - icompute = modify->find_compute(computename); - if (icompute == -1) - error->all(FLERR,"Compute ID for fix pafi does not exist"); - PathCompute = modify->compute[icompute]; + PathCompute = modify->get_compute_by_id(computename); + if (!PathCompute) + error->all(FLERR,"Compute ID {} for fix pafi does not exist", computename); + if (PathCompute->peratom_flag==0) - error->all(FLERR,"Compute for fix pafi does not calculate a local array"); + error->all(FLERR,"Compute {} for fix pafi does not calculate a local array", computename); if (PathCompute->size_peratom_cols < 9) - error->all(FLERR,"Compute for fix pafi must have 9 fields per atom"); + error->all(FLERR,"Compute {} for fix pafi must have 9 fields per atom", computename); if (comm->me==0) utils::logmesg(lmp,"fix pafi compute name,style: {},{}\n",computename,PathCompute->style); @@ -167,15 +167,14 @@ void FixPAFI::init() dtv = update->dt; dtf = 0.5 * update->dt * force->ftm2v; - icompute = modify->find_compute(computename); - if (icompute == -1) - error->all(FLERR,"Compute ID for fix pafi does not exist"); - PathCompute = modify->compute[icompute]; - if (PathCompute->peratom_flag==0) - error->all(FLERR,"Compute for fix pafi does not calculate a local array"); - if (PathCompute->size_peratom_cols < 9) - error->all(FLERR,"Compute for fix pafi must have 9 fields per atom"); + PathCompute = modify->get_compute_by_id(computename); + if (!PathCompute) + error->all(FLERR,"Compute ID {} for fix pafi does not exist", computename); + if (PathCompute->peratom_flag==0) + error->all(FLERR,"Compute {} for fix pafi does not calculate a local array", computename); + if (PathCompute->size_peratom_cols < 9) + error->all(FLERR,"Compute {} for fix pafi must have 9 fields per atom", computename); if (utils::strmatch(update->integrate_style,"^respa")) { step_respa = (dynamic_cast(update->integrate))->step; // nve @@ -183,7 +182,6 @@ void FixPAFI::init() if (respa_level >= 0) ilevel_respa = MIN(respa_level,nlevels_respa-1); else ilevel_respa = nlevels_respa-1; } - } void FixPAFI::setup(int vflag) @@ -206,7 +204,6 @@ void FixPAFI::min_setup(int vflag) min_post_force(vflag); } - void FixPAFI::post_force(int /*vflag*/) { double **x = atom->x; diff --git a/src/EXTRA-FIX/fix_temp_csld.cpp b/src/EXTRA-FIX/fix_temp_csld.cpp index cbcd2ca7df..13ead5b393 100644 --- a/src/EXTRA-FIX/fix_temp_csld.cpp +++ b/src/EXTRA-FIX/fix_temp_csld.cpp @@ -40,6 +40,7 @@ using namespace FixConst; enum{NOBIAS,BIAS}; enum{CONSTANT,EQUAL}; +static constexpr int PRNGSIZE = 98+2+3; /* ---------------------------------------------------------------------- */ FixTempCSLD::FixTempCSLD(LAMMPS *lmp, int narg, char **arg) : @@ -95,12 +96,12 @@ FixTempCSLD::FixTempCSLD(LAMMPS *lmp, int narg, char **arg) : FixTempCSLD::~FixTempCSLD() { - delete [] tstr; + delete[] tstr; // delete temperature if fix created it if (tflag) modify->delete_compute(id_temp); - delete [] id_temp; + delete[] id_temp; delete random; memory->destroy(vhold); @@ -137,15 +138,14 @@ void FixTempCSLD::init() if (tstr) { tvar = input->variable->find(tstr); if (tvar < 0) - error->all(FLERR,"Variable name for fix temp/csld does not exist"); + error->all(FLERR,"Variable name {} for fix temp/csld does not exist", tstr); if (input->variable->equalstyle(tvar)) tstyle = EQUAL; - else error->all(FLERR,"Variable for fix temp/csld is invalid style"); + else error->all(FLERR,"Variable {} for fix temp/csld is invalid style", tstr); } - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix temp/csld does not exist"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR,"Temperature ID {} for fix temp/csld does not exist", id_temp); if (modify->check_rigid_group_overlap(groupbit)) error->warning(FLERR,"Cannot thermostat atoms in rigid bodies"); @@ -158,7 +158,6 @@ void FixTempCSLD::init() void FixTempCSLD::end_of_step() { - // set current t_target // if variable temp, evaluate variable, wrap with clear/add @@ -171,8 +170,7 @@ void FixTempCSLD::end_of_step() modify->clearstep_compute(); t_target = input->variable->compute_equal(tvar); if (t_target < 0.0) - error->one(FLERR, - "Fix temp/csld variable returned negative temperature"); + error->one(FLERR, "Fix temp/csld variable returned negative temperature"); modify->addstep_compute(update->ntimestep + nevery); } @@ -259,17 +257,14 @@ int FixTempCSLD::modify_param(int narg, char **arg) modify->delete_compute(id_temp); tflag = 0; } - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) error->all(FLERR,"Could not find fix_modify temperature ID {}", id_temp); if (temperature->tempflag == 0) - error->all(FLERR, - "Fix_modify temperature ID does not compute temperature"); + error->all(FLERR, "Fix_modify temperature ID {} does not compute temperature", id_temp); if (temperature->igroup != igroup && comm->me == 0) error->warning(FLERR,"Group for fix_modify temp != fix group"); return 2; @@ -297,7 +292,6 @@ double FixTempCSLD::compute_scalar() void FixTempCSLD::write_restart(FILE *fp) { - const int PRNGSIZE = 98+2+3; int nsize = PRNGSIZE*comm->nprocs+2; // pRNG state per proc + nprocs + energy double *list = nullptr; if (comm->me == 0) { diff --git a/src/EXTRA-FIX/fix_temp_csvr.cpp b/src/EXTRA-FIX/fix_temp_csvr.cpp index b908d1a4ed..6b46629a99 100644 --- a/src/EXTRA-FIX/fix_temp_csvr.cpp +++ b/src/EXTRA-FIX/fix_temp_csvr.cpp @@ -95,12 +95,12 @@ FixTempCSVR::FixTempCSVR(LAMMPS *lmp, int narg, char **arg) : FixTempCSVR::~FixTempCSVR() { - delete [] tstr; + delete[] tstr; // delete temperature if fix created it if (tflag) modify->delete_compute(id_temp); - delete [] id_temp; + delete[] id_temp; delete random; nmax = -1; @@ -124,15 +124,14 @@ void FixTempCSVR::init() if (tstr) { tvar = input->variable->find(tstr); if (tvar < 0) - error->all(FLERR,"Variable name for fix temp/csvr does not exist"); + error->all(FLERR,"Variable name {} for fix temp/csvr does not exist", tstr); if (input->variable->equalstyle(tvar)) tstyle = EQUAL; - else error->all(FLERR,"Variable for fix temp/csvr is invalid style"); + else error->all(FLERR,"Variable {} for fix temp/csvr is invalid style", tstr); } - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix temp/csvr does not exist"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR,"Temperature ID {} for fix temp/csvr does not exist", id_temp); if (temperature->tempbias) which = BIAS; else which = NOBIAS; @@ -154,8 +153,7 @@ void FixTempCSVR::end_of_step() modify->clearstep_compute(); t_target = input->variable->compute_equal(tvar); if (t_target < 0.0) - error->one(FLERR, - "Fix temp/csvr variable returned negative temperature"); + error->one(FLERR, "Fix temp/csvr variable returned negative temperature"); modify->addstep_compute(update->ntimestep + nevery); } @@ -215,17 +213,14 @@ int FixTempCSVR::modify_param(int narg, char **arg) modify->delete_compute(id_temp); tflag = 0; } - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) error->all(FLERR,"Could not find fix_modify temperature ID {}", id_temp); if (temperature->tempflag == 0) - error->all(FLERR, - "Fix_modify temperature ID does not compute temperature"); + error->all(FLERR, "Fix_modify temperature ID {} does not compute temperature", id_temp); if (temperature->igroup != igroup && comm->me == 0) error->warning(FLERR,"Group for fix_modify temp != fix group"); return 2; diff --git a/src/PLUMED/fix_plumed.cpp b/src/PLUMED/fix_plumed.cpp index f06d5474f6..ad7f4f3995 100644 --- a/src/PLUMED/fix_plumed.cpp +++ b/src/PLUMED/fix_plumed.cpp @@ -522,14 +522,13 @@ int FixPlumed::modify_param(int narg, char **arg) delete[] id_pe; id_pe = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify potential energy ID"); - c_pe = modify->compute[icompute]; + c_pe = modify->get_compute_by_id(id_pe); + if (!c_pe) error->all(FLERR,"Could not find fix_modify potential energy ID {}", id_pe); if (c_pe->peflag == 0) - error->all(FLERR,"Fix_modify plmd_pe ID does not compute potential energy"); + error->all(FLERR,"Fix_modify compute pe ID {} does not compute potential energy", id_pe); if (c_pe->igroup != 0 && comm->me == 0) - error->warning(FLERR,"Potential for fix PLUMED is not for group all"); + error->warning(FLERR,"Potential energy compute {} for fix PLUMED is not for group all", id_pe); return 2; @@ -539,12 +538,11 @@ int FixPlumed::modify_param(int narg, char **arg) delete[] id_press; id_press = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); - c_press = modify->compute[icompute]; + c_press = modify->get_compute_by_id(id_press); + if (!c_press) error->all(FLERR,"Could not find fix_modify compute pressure ID {}", id_press); if (c_press->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); + error->all(FLERR,"Fix_modify compute pressure ID {} does not compute pressure", id_press); if (c_press->igroup != 0 && comm->me == 0) error->warning(FLERR,"Virial for fix PLUMED is not for group all"); diff --git a/src/QTB/fix_qbmsst.cpp b/src/QTB/fix_qbmsst.cpp index 2450561363..4cd2d05760 100644 --- a/src/QTB/fix_qbmsst.cpp +++ b/src/QTB/fix_qbmsst.cpp @@ -313,20 +313,20 @@ void FixQBMSST::init() error->all(FLERR,"Cannot use fix qbmsst without per-type mass defined"); // set compute ptrs - int itemp = modify->find_compute(id_temp); - int ipress = modify->find_compute(id_press); - int ipe = modify->find_compute(id_pe); - if (itemp < 0 || ipress < 0|| ipe < 0) - error->all(FLERR,"Could not find fix qbmsst compute ID"); - if (modify->compute[itemp]->tempflag == 0) - error->all(FLERR,"Fix qbmsst compute ID does not compute temperature"); - if (modify->compute[ipress]->pressflag == 0) - error->all(FLERR,"Fix qbmsst compute ID does not compute pressure"); - if (modify->compute[ipe]->peflag == 0) - error->all(FLERR,"Fix qbmsst compute ID does not compute potential energy"); - temperature = modify->compute[itemp]; - pressure = modify->compute[ipress]; - pe = modify->compute[ipe]; + + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR, "Could not find fix qbmsst temperature compute ID {}", id_temp); + if (temperature->tempflag == 0) + error->all(FLERR, "Fix qbmsst compute ID {} does not compute temperature", id_temp); + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR, "Could not find fix qbmsst pressure compute ID {}", id_press); + if (pressure->pressflag == 0) + error->all(FLERR, "Fix qbmsst compute ID {} does not compute pressure", id_press); + pe = modify->get_compute_by_id(id_pe); + if (!pe) error->all(FLERR, "Could not find fix qbmsst pe compute ID {}", id_pe); + if (pe->peflag == 0) + error->all(FLERR, "Fix qbmsst compute ID {} does not compute potential energy", id_pe); // initiate the counter l and \mu counter_l=0; @@ -856,16 +856,13 @@ int FixQBMSST::modify_param(int narg, char **arg) } delete[] id_temp; id_temp = utils::strdup(arg[1]); - - int icompute = modify->find_compute(id_temp); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; - + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR, "Could not find fix_modify temperature ID {}", id_temp); if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); + error->all(FLERR,"Fix_modify temperature ID {} does not compute temperature", id_temp); if (temperature->igroup != 0 && comm->me == 0) - error->warning(FLERR,"Temperature for QBMSST is not for group all"); - + error->warning(FLERR, "Temperature for fix qbmsst is not for group all"); return 2; } else if (strcmp(arg[0],"press") == 0) { @@ -877,12 +874,11 @@ int FixQBMSST::modify_param(int narg, char **arg) delete[] id_press; id_press = utils::strdup(arg[1]); - int icompute = modify->find_compute(id_press); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); - pressure = modify->compute[icompute]; + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Could not find fix_modify compute pressure ID {}", id_press); if (pressure->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); + error->all(FLERR,"Fix_modify compute pressure ID {} does not compute pressure", id_press); return 2; } return 0; diff --git a/src/QTB/fix_qtb.cpp b/src/QTB/fix_qtb.cpp index 2f7ce4130a..8f73a04927 100644 --- a/src/QTB/fix_qtb.cpp +++ b/src/QTB/fix_qtb.cpp @@ -345,15 +345,13 @@ int FixQTB::modify_param(int narg, char **arg) { if (strcmp(arg[0],"temp") == 0) { if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); - - int icompute = modify->find_compute(id_temp); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; - + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR, "Could not find fix_modify temperature ID {}", id_temp); if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); + error->all(FLERR,"Fix_modify temperature ID {} does not compute temperature", id_temp); if (temperature->igroup != igroup && comm->me == 0) error->warning(FLERR,"Group for fix_modify temp != fix group"); return 2; diff --git a/src/SHOCK/fix_msst.cpp b/src/SHOCK/fix_msst.cpp index 55842250ec..b3a55ff663 100644 --- a/src/SHOCK/fix_msst.cpp +++ b/src/SHOCK/fix_msst.cpp @@ -247,20 +247,19 @@ void FixMSST::init() // set compute ptrs - int itemp = modify->find_compute(id_temp); - int ipress = modify->find_compute(id_press); - int ipe = modify->find_compute(id_pe); - if (itemp < 0 || ipress < 0 || ipe < 0) error->all(FLERR, "Could not find fix msst compute ID"); - if (modify->compute[itemp]->tempflag == 0) - error->all(FLERR, "Fix msst compute ID does not compute temperature"); - if (modify->compute[ipress]->pressflag == 0) - error->all(FLERR, "Fix msst compute ID does not compute pressure"); - if (modify->compute[ipe]->peflag == 0) - error->all(FLERR, "Fix msst compute ID does not compute potential energy"); - - temperature = modify->compute[itemp]; - pressure = modify->compute[ipress]; - pe = modify->compute[ipe]; + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR, "Could not find fix msst temperature compute ID {}", id_temp); + if (temperature->tempflag == 0) + error->all(FLERR, "Fix msst compute ID {} does not compute temperature", id_temp); + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR, "Could not find fix msst pressure compute ID {}", id_press); + if (pressure->pressflag == 0) + error->all(FLERR, "Fix msst compute ID {} does not compute pressure", id_press); + pe = modify->get_compute_by_id(id_pe); + if (!pe) error->all(FLERR, "Could not find fix msst pe compute ID {}", id_pe); + if (pe->peflag == 0) + error->all(FLERR, "Fix msst compute ID {} does not compute potential energy", id_pe); dtv = update->dt; dtf = 0.5 * update->dt * force->ftm2v; @@ -801,18 +800,13 @@ int FixMSST::modify_param(int narg, char **arg) } delete[] id_temp; id_temp = utils::strdup(arg[1]); - - int icompute = modify->find_compute(id_temp); - if (icompute < 0) error->all(FLERR, "Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; - + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) + error->all(FLERR, "Could not find fix_modify temperature ID {}", id_temp); if (temperature->tempflag == 0) - error->all(FLERR, - "Fix_modify temperature ID does not " - "compute temperature"); + error->all(FLERR,"Fix_modify temperature ID {} does not compute temperature", id_temp); if (temperature->igroup != 0 && comm->me == 0) - error->warning(FLERR, "Temperature for MSST is not for group all"); - + error->warning(FLERR, "Temperature for fix msst is not for group all"); return 2; } else if (strcmp(arg[0], "press") == 0) { @@ -823,13 +817,10 @@ int FixMSST::modify_param(int narg, char **arg) } delete[] id_press; id_press = utils::strdup(arg[1]); - - int icompute = modify->find_compute(id_press); - if (icompute < 0) error->all(FLERR, "Could not find fix_modify pressure ID"); - pressure = modify->compute[icompute]; - + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR, "Could not find fix_modify pressure ID {}", id_press); if (pressure->pressflag == 0) - error->all(FLERR, "Fix_modify pressure ID does not compute pressure"); + error->all(FLERR, "Fix_modify pressure ID {} does not compute pressure", id_press); return 2; } return 0; diff --git a/src/SHOCK/fix_nphug.cpp b/src/SHOCK/fix_nphug.cpp index 0f9bb3ba93..3cad719c35 100644 --- a/src/SHOCK/fix_nphug.cpp +++ b/src/SHOCK/fix_nphug.cpp @@ -148,7 +148,7 @@ FixNPHug::~FixNPHug() // delete pe compute if (peflag) modify->delete_compute(id_pe); - delete [] id_pe; + delete[] id_pe; } /* ---------------------------------------------------------------------- */ @@ -161,10 +161,9 @@ void FixNPHug::init() // set pe ptr - int icompute = modify->find_compute(id_pe); - if (icompute < 0) - error->all(FLERR,"Potential energy ID for fix nvt/nph/npt does not exist"); - pe = modify->compute[icompute]; + pe = modify->get_compute_by_id(id_pe); + if (pe) + error->all(FLERR, "Potential energy compute ID {} for fix {} does not exist", id_pe, style); } diff --git a/src/UEF/fix_nh_uef.cpp b/src/UEF/fix_nh_uef.cpp index b8b1fd07d6..a824966d34 100644 --- a/src/UEF/fix_nh_uef.cpp +++ b/src/UEF/fix_nh_uef.cpp @@ -228,28 +228,24 @@ void FixNHUef::init() // find conflict with fix/deform or other box chaging fixes - for (int i=0; i < modify->nfix; i++) - { - if (strcmp(modify->fix[i]->id,id) != 0) - if ((modify->fix[i]->box_change & BOX_CHANGE_SHAPE) != 0) - error->all(FLERR,"Can't use another fix which changes box shape with fix/nvt/npt/uef"); + for (auto &ifix : modify->get_fix_list()) { + if (strcmp(ifix->id, id) != 0) + if ((ifix->box_change & BOX_CHANGE_SHAPE) != 0) + error->all(FLERR,"Can't use another fix which changes box shape with fix {}", style); } // this will make the pressure compute for nvt if (!pstat_flag) if (pcomputeflag) { - int icomp = modify->find_compute(id_press); - if (icomp<0) - error->all(FLERR,"Pressure ID for fix/nvt/uef doesn't exist"); - pressure = modify->compute[icomp]; - + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Pressure ID {} for {} doesn't exist", id_press, style); if (strcmp(pressure->style,"pressure/uef") != 0) - error->all(FLERR,"Using fix nvt/npt/uef without a compute pressure/uef"); + error->all(FLERR,"Using fix {} without a compute pressure/uef", style); } if (strcmp(temperature->style,"temp/uef") != 0) - error->all(FLERR,"Using fix nvt/npt/uef without a compute temp/uef"); + error->all(FLERR,"Using fix {} without a compute temp/uef", style); } /* ---------------------------------------------------------------------- From bc6031cd24b76fc28ef150f894177003185c7eff Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 13 Nov 2023 14:53:45 -0700 Subject: [PATCH 020/116] Fix bug in Kokkos minimize + fix deform --- src/KOKKOS/npair_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 45ec83e90e..44e9e355b9 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -113,7 +113,7 @@ void NPairKokkos::copy_stencil_info() NPair::copy_stencil_info(); nstencil = ns->nstencil; - if (ns->last_stencil != last_stencil_old) { + if (ns->last_stencil != last_stencil_old || ns->last_stencil == update->ntimestep) { // copy stencil to device as it may have changed last_stencil_old = ns->last_stencil; From d88ffeea3fb64bc770f8ac63ee0283ec0e7f8f3a Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 13 Nov 2023 14:58:48 -0700 Subject: [PATCH 021/116] Need force_clear for atom_vec_spin_kokkos --- src/KOKKOS/atom_vec_spin_kokkos.cpp | 32 +++++++++++++++++++++++++++++ src/KOKKOS/atom_vec_spin_kokkos.h | 1 + src/KOKKOS/min_kokkos.cpp | 18 ++++++++++------ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index f5b8697352..9de02c3b28 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -586,6 +586,38 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n } } +/* ---------------------------------------------------------------------- + clear extra forces starting at atom N + nbytes = # of bytes to clear for a per-atom vector + include f b/c this is invoked from within SPIN pair styles +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::force_clear(int n, size_t nbytes) +{ + int nzero = nbytes/sizeof(double); + + if (nzero) { + atomKK->k_fm.clear_sync_state(); // will be cleared below + atomKK->k_fm_long.clear_sync_state(); // will be cleared below + + // local variables for lambda capture + + auto l_fm = atomKK->k_fm.d_view; + auto l_fm_long = atomKK->k_fm_long.d_view; + + Kokkos::parallel_for(nzero, LAMMPS_LAMBDA(int i) { + l_fm(i,0) = 0.0; + l_fm(i,1) = 0.0; + l_fm(i,2) = 0.0; + l_fm_long(i,0) = 0.0; + l_fm_long(i,1) = 0.0; + l_fm_long(i,2) = 0.0; + }); + + atomKK->modified(Device,FM_MASK|FML_MASK); + } +} + /* ---------------------------------------------------------------------- */ void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask) diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index d14d01fb62..f0145e4db7 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -34,6 +34,7 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { AtomVecSpinKokkos(class LAMMPS *); void grow(int) override; void grow_pointers() override; + void force_clear(int, size_t) override; void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, diff --git a/src/KOKKOS/min_kokkos.cpp b/src/KOKKOS/min_kokkos.cpp index bbb9a0bd6e..c01a53c7b3 100644 --- a/src/KOKKOS/min_kokkos.cpp +++ b/src/KOKKOS/min_kokkos.cpp @@ -72,8 +72,7 @@ void MinKokkos::init() void MinKokkos::setup(int flag) { if (comm->me == 0 && screen) { - fmt::print(screen,"Setting up {} style minimization ...\n", - update->minimize_style); + fmt::print(screen,"Setting up {} style minimization ...\n", update->minimize_style); if (flag) { fmt::print(screen," Unit style : {}\n", update->unit_style); fmt::print(screen," Current step : {}\n", update->ntimestep); @@ -92,14 +91,13 @@ void MinKokkos::setup(int flag) fextra = new double[nextra_global]; if (comm->me == 0) error->warning(FLERR, "Energy due to {} extra global DOFs will" - " be included in minimizer energies\n", nextra_global); + " be included in minimizer energies\n",nextra_global); } // compute for potential energy - int id = modify->find_compute("thermo_pe"); - if (id < 0) error->all(FLERR,"Minimization could not find thermo_pe compute"); - pe_compute = modify->compute[id]; + pe_compute = modify->get_compute_by_id("thermo_pe"); + if (!pe_compute) error->all(FLERR,"Minimization could not find thermo_pe compute"); // style-specific setup does two tasks // setup extra global dof vectors @@ -537,6 +535,7 @@ double MinKokkos::energy_force(int resetflag) if (resetflag) fix_minimize_kk->reset_coords(); reset_vectors(); } + return energy; } @@ -575,7 +574,14 @@ void MinKokkos::force_clear() l_torque(i,2) = 0.0; } }); + + if (extraflag) { + size_t nbytes = sizeof(double) * atom->nlocal; + if (force->newton) nbytes += sizeof(double) * atom->nghost; + atom->avec->force_clear(0,nbytes); + } } + atomKK->modified(Device,F_MASK|TORQUE_MASK); } From e6485002adf4209f5a217736c2707f4be8eaeef8 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 13 Nov 2023 15:08:49 -0700 Subject: [PATCH 022/116] Avoid integer division --- src/KOKKOS/atom_vec_spin_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 9de02c3b28..d2dd3a05ab 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -594,7 +594,7 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n void AtomVecSpinKokkos::force_clear(int n, size_t nbytes) { - int nzero = nbytes/sizeof(double); + int nzero = (double)nbytes/sizeof(double); if (nzero) { atomKK->k_fm.clear_sync_state(); // will be cleared below From bcfbea99038232a16632ee613ae041a60c970aed Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 15 Nov 2023 08:03:15 -0500 Subject: [PATCH 023/116] throw error for illegal replication values --- src/replicate.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/replicate.cpp b/src/replicate.cpp index 01cc1faabb..6a4c8bfd74 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -52,8 +52,11 @@ void Replicate::command(int narg, char **arg) int nx = utils::inumeric(FLERR,arg[0],false,lmp); int ny = utils::inumeric(FLERR,arg[1],false,lmp); int nz = utils::inumeric(FLERR,arg[2],false,lmp); - int nrep = nx*ny*nz; + if ((nx <= 0) || (ny <= 0) || (nz <= 0)) + error->all(FLERR, "Illegal replication grid {}x{}x{}. All replications must be > 0", + nx, ny, nz); + int nrep = nx*ny*nz; if (me == 0) utils::logmesg(lmp, "Replication is creating a {}x{}x{} = {} times larger system...\n", nx, ny, nz, nrep); From aec22660646b767550ba8a2803cdac3d7b5de4c1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 15 Nov 2023 09:43:02 -0500 Subject: [PATCH 024/116] fix typo --- src/SHOCK/fix_nphug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SHOCK/fix_nphug.cpp b/src/SHOCK/fix_nphug.cpp index 3cad719c35..b4b4a866b6 100644 --- a/src/SHOCK/fix_nphug.cpp +++ b/src/SHOCK/fix_nphug.cpp @@ -162,7 +162,7 @@ void FixNPHug::init() // set pe ptr pe = modify->get_compute_by_id(id_pe); - if (pe) + if (!pe) error->all(FLERR, "Potential energy compute ID {} for fix {} does not exist", id_pe, style); } From b3265729169f0d24c94b6c8a571b671627b872ae Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 15 Nov 2023 09:49:58 -0500 Subject: [PATCH 025/116] flag input errors --- src/REPLICA/fix_pimd_langevin.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index cffaf327e4..fd19e804e7 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -455,7 +455,12 @@ void FixPIMDLangevin::init() langevin_init(); c_pe = modify->get_compute_by_id(id_pe); + if (!c_pe) + error->universe_all(FLERR, fmt::format("Could not find fix {} potential energy compute ID {}", style, id_pe)); + c_press = modify->get_compute_by_id(id_press); + if (!c_press) + error->universe_all(FLERR, fmt::format("Could not find fix {} pressure compute ID {}", style, id_press)); t_prim = t_vir = t_cv = p_prim = p_vir = p_cv = p_md = 0.0; } From 1f509d900e7428970c2e2a140383ab45050203e6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 16 Nov 2023 11:57:33 -0500 Subject: [PATCH 026/116] make output more consistent with other LAMMPS code --- src/REPLICA/fix_pimd_langevin.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index d328420ce9..2974359c33 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -419,10 +419,10 @@ int FixPIMDLangevin::setmask() void FixPIMDLangevin::init() { if (atom->map_style == Atom::MAP_NONE) - error->all(FLERR, "fix pimd/langevin requires an atom map, see atom_modify"); + error->all(FLERR, "Fix pimd/langevin requires an atom map, see atom_modify"); if (universe->me == 0 && universe->uscreen) - fprintf(universe->uscreen, "fix pimd/langevin initializing Path-Integral ...\n"); + fprintf(universe->uscreen, "Fix pimd/langevin: initializing Path-Integral ...\n"); // prepare the constants @@ -446,7 +446,7 @@ void FixPIMDLangevin::init() if ((universe->me == 0) && (universe->uscreen)) fprintf(universe->uscreen, - "fix pimd/langevin -P/(beta^2 * hbar^2) = %20.7lE (kcal/mol/A^2)\n\n", fbond); + "Fix pimd/langevin: -P/(beta^2 * hbar^2) = %20.7lE (kcal/mol/A^2)\n\n", fbond); if (integrator == OBABO) { dtf = 0.5 * update->dt * force->ftm2v; @@ -899,8 +899,8 @@ void FixPIMDLangevin::baro_init() } Vcoeff = 1.0; std::string out = fmt::format("\nInitializing PIMD {:s} barostat...\n", Barostats[barostat]); - out += fmt::format("The barostat mass is W = {:.16e}\n", W); - utils::logmesg(lmp, out); + out += fmt::format(" The barostat mass is W = {:.16e}\n", W); + if (universe->me == 0) utils::logmesg(lmp, out); } /* ---------------------------------------------------------------------- */ @@ -1017,8 +1017,8 @@ void FixPIMDLangevin::langevin_init() c2 = sqrt(1.0 - c1 * c1); // note that c1 and c2 here only works for the centroid mode. if (thermostat == PILE_L) { - std::string out = "\nInitializing PI Langevin equation thermostat...\n"; - out += "Bead ID | omega | tau | c1 | c2\n"; + std::string out = "Initializing PI Langevin equation thermostat...\n"; + out += " Bead ID | omega | tau | c1 | c2\n"; if (method == NMPIMD) { tau_k = new double[np]; c1_k = new double[np]; @@ -1039,18 +1039,18 @@ void FixPIMDLangevin::langevin_init() c2_k[i] = sqrt(1.0 - c1_k[i] * c1_k[i]); } for (int i = 0; i < np; i++) { - out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_k[i], tau_k[i], - c1_k[i], c2_k[i]); + out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, + _omega_k[i], tau_k[i], c1_k[i], c2_k[i]); } } else if (method == PIMD) { for (int i = 0; i < np; i++) { - out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, _omega_np / sqrt(fmass), - tau, c1, c2); + out += fmt::format(" {:d} {:.8e} {:.8e} {:.8e} {:.8e}\n", i, + _omega_np / sqrt(fmass), tau, c1, c2); } } - if (thermostat == PILE_L) out += "PILE_L thermostat successfully initialized!\n"; + if (thermostat == PILE_L) out += " PILE_L thermostat successfully initialized!\n"; out += "\n"; - utils::logmesg(lmp, out); + if (universe->me == 0) utils::logmesg(lmp, out); } } From 7f2a6b1a8585323d7eb1419f2bd46ef7074359b0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 16 Nov 2023 12:00:29 -0500 Subject: [PATCH 027/116] update h2 fix pimd/langevin example --- .../langevin_metal_units/in.langevin.metal | 28 +++++ .../{in.lmp => in.pimd-langevin.metal} | 2 +- ...tal.g++ => log.16Nov23.langevin.metal.g++} | 2 +- ...g++.0 => log.16Nov23.langevin.metal.g++.0} | 60 +++++----- ...g++.1 => log.16Nov23.langevin.metal.g++.1} | 59 ++++------ ...g++.2 => log.16Nov23.langevin.metal.g++.2} | 59 ++++------ ...g++.3 => log.16Nov23.langevin.metal.g++.3} | 57 ++++------ .../log.16Nov23.pimd-langevin.metal.g++ | 2 + .../log.16Nov23.pimd-langevin.metal.g++.0 | 103 ++++++++++++++++++ .../log.16Nov23.pimd-langevin.metal.g++.1 | 95 ++++++++++++++++ .../log.16Nov23.pimd-langevin.metal.g++.2 | 95 ++++++++++++++++ .../log.16Nov23.pimd-langevin.metal.g++.3 | 95 ++++++++++++++++ .../PACKAGES/pimd/langevin_metal_units/run.sh | 4 +- 13 files changed, 524 insertions(+), 137 deletions(-) create mode 100644 examples/PACKAGES/pimd/langevin_metal_units/in.langevin.metal rename examples/PACKAGES/pimd/langevin_metal_units/{in.lmp => in.pimd-langevin.metal} (80%) rename examples/PACKAGES/pimd/langevin_metal_units/{log.14Jun23.langevin.metal.g++ => log.16Nov23.langevin.metal.g++} (64%) rename examples/PACKAGES/pimd/langevin_metal_units/{log.14Jun23.langevin.metal.g++.0 => log.16Nov23.langevin.metal.g++.0} (70%) rename examples/PACKAGES/pimd/langevin_metal_units/{log.14Jun23.langevin.metal.g++.1 => log.16Nov23.langevin.metal.g++.1} (70%) rename examples/PACKAGES/pimd/langevin_metal_units/{log.14Jun23.langevin.metal.g++.2 => log.16Nov23.langevin.metal.g++.2} (70%) rename examples/PACKAGES/pimd/langevin_metal_units/{log.14Jun23.langevin.metal.g++.3 => log.16Nov23.langevin.metal.g++.3} (70%) create mode 100644 examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++ create mode 100644 examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.0 create mode 100644 examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.1 create mode 100644 examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.2 create mode 100644 examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.3 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/in.langevin.metal b/examples/PACKAGES/pimd/langevin_metal_units/in.langevin.metal new file mode 100644 index 0000000000..071b4ad722 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/in.langevin.metal @@ -0,0 +1,28 @@ +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} + +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/in.lmp b/examples/PACKAGES/pimd/langevin_metal_units/in.pimd-langevin.metal similarity index 80% rename from examples/PACKAGES/pimd/langevin_metal_units/in.lmp rename to examples/PACKAGES/pimd/langevin_metal_units/in.pimd-langevin.metal index 124063df99..4d1416063c 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/in.lmp +++ b/examples/PACKAGES/pimd/langevin_metal_units/in.pimd-langevin.metal @@ -16,7 +16,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++ similarity index 64% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++ rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++ index fa22106766..a05d2d9aa8 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++ +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++ @@ -1,2 +1,2 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Running on 4 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.0 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.0 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.0 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.0 index 00787df8ba..c34bf8e7dd 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.0 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.0 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 0 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -29,10 +30,10 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 01 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 + The barostat mass is W = 2.3401256650800001e+01 thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,14 +44,13 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! + Bead ID | omega | tau | c1 | c2 + 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 + 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 + 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 + PILE_L thermostat successfully initialized! Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes @@ -66,31 +66,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 0 -7.3046601 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -164.47373 - 100 149.95804 3.8573359 0 -7.7921375 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 -101.30374 - 200 245.00113 6.3021074 0 -8.2639651 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 112.22426 - 300 300.57486 7.7316177 0 -8.2986331 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 814.45889 - 400 368.08438 9.4681493 0 -8.4800193 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1202.0398 - 500 419.32066 10.786088 0 -8.640773 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 952.59748 - 600 385.4127 9.9138817 0 -8.4356035 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 676.62913 - 700 360.14242 9.2638601 0 -8.2900275 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 456.91446 - 800 346.92167 8.923786 0 -8.0694169 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 454.60123 - 900 364.39442 9.3732334 0 -8.0415668 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 499.75868 - 1000 390.77042 10.051697 0 -8.1948009 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 572.98799 -Loop time of 0.248186 on 1 procs for 1000 steps with 200 atoms + 0 0 0 0 -7.3046601 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -164.47373 + 100 149.95804 3.8573359 0 -7.7921375 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 -101.30374 + 200 245.00113 6.3021074 0 -8.2639651 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 112.22426 + 300 300.57486 7.7316177 0 -8.2986331 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 814.45889 + 400 368.08438 9.4681493 0 -8.4800193 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1202.0398 + 500 419.32066 10.786088 0 -8.640773 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 952.59748 + 600 385.4127 9.9138817 0 -8.4356035 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 676.62913 + 700 360.14242 9.2638601 0 -8.2900275 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 456.91446 + 800 346.92167 8.923786 0 -8.0694169 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 454.60123 + 900 364.39442 9.3732334 0 -8.0415668 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 499.75868 + 1000 390.77042 10.051697 0 -8.1948009 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 572.98799 +Loop time of 0.218442 on 1 procs for 1000 steps with 200 atoms -Performance: 348.126 ns/day, 0.069 hours/ns, 4029.238 timesteps/s, 805.848 katom-step/s -99.6% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.527 ns/day, 0.061 hours/ns, 4577.865 timesteps/s, 915.573 katom-step/s +98.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14541 | 0.14541 | 0.14541 | 0.0 | 58.59 -Neigh | 0.00099082 | 0.00099082 | 0.00099082 | 0.0 | 0.40 -Comm | 0.0039966 | 0.0039966 | 0.0039966 | 0.0 | 1.61 -Output | 0.00016346 | 0.00016346 | 0.00016346 | 0.0 | 0.07 -Modify | 0.096205 | 0.096205 | 0.096205 | 0.0 | 38.76 -Other | | 0.001425 | | | 0.57 +Pair | 0.11918 | 0.11918 | 0.11918 | 0.0 | 54.56 +Neigh | 0.0010314 | 0.0010314 | 0.0010314 | 0.0 | 0.47 +Comm | 0.0046197 | 0.0046197 | 0.0046197 | 0.0 | 2.11 +Output | 0.0001329 | 0.0001329 | 0.0001329 | 0.0 | 0.06 +Modify | 0.092616 | 0.092616 | 0.092616 | 0.0 | 42.40 +Other | | 0.0008616 | | | 0.39 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.1 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.1 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.1 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.1 index 83821cafb7..58f4695d46 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.1 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.1 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 1 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -17,7 +18,7 @@ Reading data file ... 200 atoms reading velocities ... 200 velocities - read_data CPU = 0.001 seconds + read_data CPU = 0.002 seconds pair_coeff * * 0.00965188 3.4 pair_modify shift yes @@ -29,10 +30,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 02 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no - -Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,15 +41,6 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! - Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes max neighbors/atom: 2000, page size: 100000 @@ -66,31 +55,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 11.920908 -7.3063682 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -167.65544 - 100 483.61933 12.440028 11.405863 -7.7749671 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 606.14668 - 200 452.03836 11.627678 11.47094 -8.2534927 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 583.5476 - 300 470.25997 12.096389 11.739306 -8.3750153 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1152.6851 - 400 459.46597 11.818737 12.502421 -8.5240576 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1381.0251 - 500 442.73121 11.388273 11.19396 -8.6488583 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1000.6119 - 600 493.47034 12.693424 11.91335 -8.4625706 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 904.52944 - 700 470.04548 12.090871 10.348757 -8.278182 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 715.22796 - 800 458.04928 11.782296 11.152029 -8.0926613 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 678.21261 - 900 468.60547 12.05383 10.937315 -8.0319335 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 735.24377 - 1000 427.44192 10.99499 11.916587 -8.2229199 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 637.98311 -Loop time of 0.248186 on 1 procs for 1000 steps with 200 atoms + 0 0 0 11.920908 -7.3063682 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -167.65544 + 100 483.61933 12.440028 11.405863 -7.7749671 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 606.14668 + 200 452.03836 11.627678 11.47094 -8.2534927 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 583.5476 + 300 470.25997 12.096389 11.739306 -8.3750153 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1152.6851 + 400 459.46597 11.818737 12.502421 -8.5240576 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1381.0251 + 500 442.73121 11.388273 11.19396 -8.6488583 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1000.6119 + 600 493.47034 12.693424 11.91335 -8.4625706 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 904.52944 + 700 470.04548 12.090871 10.348757 -8.278182 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 715.22796 + 800 458.04928 11.782296 11.152029 -8.0926613 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 678.21261 + 900 468.60547 12.05383 10.937315 -8.0319335 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 735.24377 + 1000 427.44192 10.99499 11.916587 -8.2229199 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 637.98311 +Loop time of 0.218435 on 1 procs for 1000 steps with 200 atoms -Performance: 348.126 ns/day, 0.069 hours/ns, 4029.238 timesteps/s, 805.848 katom-step/s -99.5% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.541 ns/day, 0.061 hours/ns, 4578.021 timesteps/s, 915.604 katom-step/s +99.1% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14654 | 0.14654 | 0.14654 | 0.0 | 59.04 -Neigh | 0.00099986 | 0.00099986 | 0.00099986 | 0.0 | 0.40 -Comm | 0.0041628 | 0.0041628 | 0.0041628 | 0.0 | 1.68 -Output | 0.00018019 | 0.00018019 | 0.00018019 | 0.0 | 0.07 -Modify | 0.094878 | 0.094878 | 0.094878 | 0.0 | 38.23 -Other | | 0.001424 | | | 0.57 +Pair | 0.11791 | 0.11791 | 0.11791 | 0.0 | 53.98 +Neigh | 0.0010247 | 0.0010247 | 0.0010247 | 0.0 | 0.47 +Comm | 0.0035577 | 0.0035577 | 0.0035577 | 0.0 | 1.63 +Output | 0.00011003 | 0.00011003 | 0.00011003 | 0.0 | 0.05 +Modify | 0.09496 | 0.09496 | 0.09496 | 0.0 | 43.47 +Other | | 0.0008711 | | | 0.40 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.2 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.2 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.2 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.2 index fd8dd409ae..0b76ce5bbc 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.2 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.2 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 2 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -17,7 +18,7 @@ Reading data file ... 200 atoms reading velocities ... 200 velocities - read_data CPU = 0.001 seconds + read_data CPU = 0.002 seconds pair_coeff * * 0.00965188 3.4 pair_modify shift yes @@ -29,10 +30,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 03 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no - -Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,15 +41,6 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! - Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes max neighbors/atom: 2000, page size: 100000 @@ -66,31 +55,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 10.862314 -7.320388 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -175.34503 - 100 455.18121 11.708521 11.48472 -7.8033686 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 526.41632 - 200 460.81997 11.853566 10.817157 -8.2276485 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 615.80924 - 300 481.48652 12.385166 10.035423 -8.3866916 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1169.2917 - 400 487.3584 12.536208 11.766522 -8.3643382 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1574.1427 - 500 446.36019 11.48162 12.144202 -8.680266 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 979.68395 - 600 500.3783 12.871115 11.075008 -8.47833 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 912.39361 - 700 435.40634 11.199857 10.923558 -8.3090105 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 617.20857 - 800 446.82793 11.493652 11.599712 -8.0900498 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 652.13243 - 900 448.28506 11.531133 12.130739 -8.0810557 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 674.68073 - 1000 440.94913 11.342433 10.765654 -8.1419484 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 730.67128 -Loop time of 0.248185 on 1 procs for 1000 steps with 200 atoms + 0 0 0 10.862314 -7.320388 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -175.34503 + 100 455.18121 11.708521 11.48472 -7.8033686 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 526.41632 + 200 460.81997 11.853566 10.817157 -8.2276485 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 615.80924 + 300 481.48652 12.385166 10.035423 -8.3866916 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1169.2917 + 400 487.3584 12.536208 11.766522 -8.3643382 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1574.1427 + 500 446.36019 11.48162 12.144202 -8.680266 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 979.68395 + 600 500.3783 12.871115 11.075008 -8.47833 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 912.39361 + 700 435.40634 11.199857 10.923558 -8.3090105 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 617.20857 + 800 446.82793 11.493652 11.599712 -8.0900498 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 652.13243 + 900 448.28506 11.531133 12.130739 -8.0810557 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 674.68073 + 1000 440.94913 11.342433 10.765654 -8.1419484 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 730.67128 +Loop time of 0.218435 on 1 procs for 1000 steps with 200 atoms -Performance: 348.128 ns/day, 0.069 hours/ns, 4029.259 timesteps/s, 805.852 katom-step/s -97.8% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.541 ns/day, 0.061 hours/ns, 4578.019 timesteps/s, 915.604 katom-step/s +99.1% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14702 | 0.14702 | 0.14702 | 0.0 | 59.24 -Neigh | 0.0010003 | 0.0010003 | 0.0010003 | 0.0 | 0.40 -Comm | 0.0039821 | 0.0039821 | 0.0039821 | 0.0 | 1.60 -Output | 0.00023527 | 0.00023527 | 0.00023527 | 0.0 | 0.09 -Modify | 0.094519 | 0.094519 | 0.094519 | 0.0 | 38.08 -Other | | 0.001427 | | | 0.58 +Pair | 0.12079 | 0.12079 | 0.12079 | 0.0 | 55.30 +Neigh | 0.0010224 | 0.0010224 | 0.0010224 | 0.0 | 0.47 +Comm | 0.0035478 | 0.0035478 | 0.0035478 | 0.0 | 1.62 +Output | 0.00010889 | 0.00010889 | 0.00010889 | 0.0 | 0.05 +Modify | 0.092098 | 0.092098 | 0.092098 | 0.0 | 42.16 +Other | | 0.0008684 | | | 0.40 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.3 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.3 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.3 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.3 index 423ebb7d63..25578a068c 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.3 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.3 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 3 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -29,10 +30,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 04 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no - -Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,15 +41,6 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! - Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes max neighbors/atom: 2000, page size: 100000 @@ -66,31 +55,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 10.794425 -7.3457072 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -191.02389 - 100 426.01705 10.958338 12.206372 -7.8040582 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 464.39271 - 200 414.52703 10.662783 11.934129 -8.2331312 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 502.87052 - 300 424.85622 10.928478 11.681713 -8.357621 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1058.1162 - 400 485.80103 12.496148 12.255827 -8.3658975 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1570.2486 - 500 462.99006 11.909386 11.187609 -8.6934698 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1014.2134 - 600 465.24407 11.967366 11.168375 -8.4422887 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 864.12413 - 700 426.16111 10.962044 11.000011 -8.2855512 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 614.76939 - 800 454.53159 11.691811 10.834606 -8.0654281 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 684.85907 - 900 441.72064 11.362278 10.4492 -8.0786302 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 659.68525 - 1000 429.90929 11.058457 11.851933 -8.1578394 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 698.73278 -Loop time of 0.248175 on 1 procs for 1000 steps with 200 atoms + 0 0 0 10.794425 -7.3457072 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -191.02389 + 100 426.01705 10.958338 12.206372 -7.8040582 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 464.39271 + 200 414.52703 10.662783 11.934129 -8.2331312 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 502.87052 + 300 424.85622 10.928478 11.681713 -8.357621 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1058.1162 + 400 485.80103 12.496148 12.255827 -8.3658975 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1570.2486 + 500 462.99006 11.909386 11.187609 -8.6934698 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1014.2134 + 600 465.24407 11.967366 11.168375 -8.4422887 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 864.12413 + 700 426.16111 10.962044 11.000011 -8.2855512 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 614.76939 + 800 454.53159 11.691811 10.834606 -8.0654281 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 684.85907 + 900 441.72064 11.362278 10.4492 -8.0786302 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 659.68525 + 1000 429.90929 11.058457 11.851933 -8.1578394 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 698.73278 +Loop time of 0.218441 on 1 procs for 1000 steps with 200 atoms -Performance: 348.141 ns/day, 0.069 hours/ns, 4029.409 timesteps/s, 805.882 katom-step/s -98.1% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.530 ns/day, 0.061 hours/ns, 4577.899 timesteps/s, 915.580 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14919 | 0.14919 | 0.14919 | 0.0 | 60.12 -Neigh | 0.00099112 | 0.00099112 | 0.00099112 | 0.0 | 0.40 -Comm | 0.0040992 | 0.0040992 | 0.0040992 | 0.0 | 1.65 -Output | 0.0001723 | 0.0001723 | 0.0001723 | 0.0 | 0.07 -Modify | 0.092299 | 0.092299 | 0.092299 | 0.0 | 37.19 -Other | | 0.00142 | | | 0.57 +Pair | 0.11655 | 0.11655 | 0.11655 | 0.0 | 53.35 +Neigh | 0.0010236 | 0.0010236 | 0.0010236 | 0.0 | 0.47 +Comm | 0.0035622 | 0.0035622 | 0.0035622 | 0.0 | 1.63 +Output | 0.0001071 | 0.0001071 | 0.0001071 | 0.0 | 0.05 +Modify | 0.096348 | 0.096348 | 0.096348 | 0.0 | 44.11 +Other | | 0.0008537 | | | 0.39 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++ new file mode 100644 index 0000000000..a05d2d9aa8 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++ @@ -0,0 +1,2 @@ +LAMMPS (3 Aug 2023) +Running on 4 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.0 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.0 new file mode 100644 index 0000000000..2d9d049a49 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.0 @@ -0,0 +1,103 @@ +LAMMPS (3 Aug 2023) +Processor partition = 0 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt01 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 01 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Initializing PI Langevin equation thermostat... + Bead ID | omega | tau | c1 | c2 + 0 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 1 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 2 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 3 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + PILE_L thermostat successfully initialized! + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 8.8893303 -7.3046601 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -164.47373 + 100 248.24141 6.3854564 4.1458616 -7.7546467 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 26.894568 + 200 346.2569 8.9066861 2.6427185 -7.8943744 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 91.225638 + 300 217.65314 5.5986414 7.0223362 -7.788449 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -125.00786 + 400 266.83825 6.8638187 6.2507813 -7.7241546 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 21.104834 + 500 342.40379 8.8075736 5.1959052 -7.7020799 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 208.60351 + 600 280.37754 7.2120867 8.0025846 -7.5954127 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 162.09838 + 700 377.11625 9.700474 6.0049074 -7.5861377 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 389.35575 + 800 378.36221 9.7325237 6.1704761 -7.6170017 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 387.71781 + 900 271.99864 6.9965581 9.037081 -7.4781664 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 266.3664 + 1000 362.4753 9.3238683 8.0266514 -7.4835536 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 433.67079 +Loop time of 0.166656 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.435 ns/day, 0.046 hours/ns, 6000.401 timesteps/s, 1.200 Matom-step/s +99.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10478 | 0.10478 | 0.10478 | 0.0 | 62.87 +Neigh | 0.00096007 | 0.00096007 | 0.00096007 | 0.0 | 0.58 +Comm | 0.0035065 | 0.0035065 | 0.0035065 | 0.0 | 2.10 +Output | 0.0001037 | 0.0001037 | 0.0001037 | 0.0 | 0.06 +Modify | 0.056454 | 0.056454 | 0.056454 | 0.0 | 33.87 +Other | | 0.0008515 | | | 0.51 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1360 ave 1360 max 1360 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9544 ave 9544 max 9544 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9544 +Ave neighs/atom = 47.72 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.1 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.1 new file mode 100644 index 0000000000..9be069b960 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.1 @@ -0,0 +1,95 @@ +LAMMPS (3 Aug 2023) +Processor partition = 1 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt02 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 02 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 8.4854554 -7.3063682 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -167.65544 + 100 231.55472 5.9562285 3.9188988 -7.7552569 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 -9.7693407 + 200 366.33366 9.423116 2.3606144 -7.8893287 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 131.05061 + 300 213.74457 5.4981021 6.4391043 -7.7947526 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -136.92734 + 400 273.60832 7.0379636 5.6777233 -7.7709858 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 -0.14681392 + 500 338.99655 8.7199299 5.4335645 -7.7194465 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 190.1705 + 600 298.58126 7.6803369 7.2512164 -7.5741948 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 226.78095 + 700 352.53291 9.0681226 5.4845895 -7.5875298 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 335.39327 + 800 389.70585 10.024313 5.143907 -7.6218106 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 409.36108 + 900 285.3019 7.3387547 7.6228894 -7.5140003 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 266.93105 + 1000 345.35667 8.8835299 6.9652602 -7.5180013 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 368.83819 +Loop time of 0.16666 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.420 ns/day, 0.046 hours/ns, 6000.230 timesteps/s, 1.200 Matom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10798 | 0.10798 | 0.10798 | 0.0 | 64.79 +Neigh | 0.00097784 | 0.00097784 | 0.00097784 | 0.0 | 0.59 +Comm | 0.0035304 | 0.0035304 | 0.0035304 | 0.0 | 2.12 +Output | 8.5625e-05 | 8.5625e-05 | 8.5625e-05 | 0.0 | 0.05 +Modify | 0.05322 | 0.05322 | 0.05322 | 0.0 | 31.93 +Other | | 0.0008694 | | | 0.52 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1360 ave 1360 max 1360 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9552 ave 9552 max 9552 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9552 +Ave neighs/atom = 47.76 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.2 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.2 new file mode 100644 index 0000000000..f5869bcb03 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.2 @@ -0,0 +1,95 @@ +LAMMPS (3 Aug 2023) +Processor partition = 2 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt03 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 03 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 8.4016332 -7.320388 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -175.34503 + 100 235.06814 6.0466034 4.1185166 -7.7660023 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 -7.6578222 + 200 341.9927 8.7969992 2.7767151 -7.9109058 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 69.587081 + 300 206.29873 5.3065745 7.3388955 -7.7440046 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -118.3143 + 400 305.56268 7.8599181 5.7681208 -7.7110516 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 107.63706 + 500 313.47536 8.0634543 5.5086382 -7.7030371 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 145.14899 + 600 258.53638 6.6502715 8.1299001 -7.6530176 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 92.36234 + 700 357.63679 9.1994085 6.539048 -7.6186515 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 329.17147 + 800 391.32883 10.066061 5.7809035 -7.6148923 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 415.13205 + 900 308.61185 7.9383512 8.9544585 -7.4803275 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 341.46691 + 1000 317.70376 8.1722204 7.3013798 -7.4667312 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 352.92253 +Loop time of 0.16666 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.420 ns/day, 0.046 hours/ns, 6000.235 timesteps/s, 1.200 Matom-step/s +98.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10509 | 0.10509 | 0.10509 | 0.0 | 63.06 +Neigh | 0.00096379 | 0.00096379 | 0.00096379 | 0.0 | 0.58 +Comm | 0.0035557 | 0.0035557 | 0.0035557 | 0.0 | 2.13 +Output | 7.8072e-05 | 7.8072e-05 | 7.8072e-05 | 0.0 | 0.05 +Modify | 0.05611 | 0.05611 | 0.05611 | 0.0 | 33.67 +Other | | 0.0008601 | | | 0.52 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1364 ave 1364 max 1364 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9545 ave 9545 max 9545 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9545 +Ave neighs/atom = 47.725 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.3 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.3 new file mode 100644 index 0000000000..da0767fe0d --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.3 @@ -0,0 +1,95 @@ +LAMMPS (3 Aug 2023) +Processor partition = 3 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt04 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 04 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 7.8012276 -7.3457072 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -191.02389 + 100 241.19035 6.2040835 3.9473764 -7.7641902 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 3.5720518 + 200 325.21166 8.3653443 2.3529831 -7.9137212 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 33.184125 + 300 209.19735 5.381135 6.7063061 -7.801056 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -154.10632 + 400 280.84513 7.2241142 5.8838331 -7.7320495 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 45.624285 + 500 367.15726 9.4443014 5.2842629 -7.6643085 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 283.59979 + 600 294.68254 7.5800508 6.5104311 -7.6234652 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 183.09906 + 700 356.64514 9.1739005 5.2769462 -7.6204507 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 334.31754 + 800 360.77353 9.2800941 5.7976264 -7.6946985 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 299.62001 + 900 291.14241 7.4889889 7.5124196 -7.5102882 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 277.94834 + 1000 362.48694 9.3241677 6.8711151 -7.4856221 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 428.98682 +Loop time of 0.166662 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.414 ns/day, 0.046 hours/ns, 6000.167 timesteps/s, 1.200 Matom-step/s +98.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10817 | 0.10817 | 0.10817 | 0.0 | 64.90 +Neigh | 0.00096402 | 0.00096402 | 0.00096402 | 0.0 | 0.58 +Comm | 0.0044991 | 0.0044991 | 0.0044991 | 0.0 | 2.70 +Output | 8.5449e-05 | 8.5449e-05 | 8.5449e-05 | 0.0 | 0.05 +Modify | 0.052066 | 0.052066 | 0.052066 | 0.0 | 31.24 +Other | | 0.00088 | | | 0.53 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1368 ave 1368 max 1368 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9541 ave 9541 max 9541 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9541 +Ave neighs/atom = 47.705 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/run.sh b/examples/PACKAGES/pimd/langevin_metal_units/run.sh index 2580ef1a41..8bac9231a3 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/run.sh +++ b/examples/PACKAGES/pimd/langevin_metal_units/run.sh @@ -1 +1,3 @@ -mpirun -np 4 $LMP -in in.lmp -p 4x1 -log log -screen screen +mpirun -np 4 $LMP -in in.langevin.metal -p 4x1 -log log.langevin.metal -screen screen +mpirun -np 4 $LMP -in in.pimd-langevin.metal -p 4x1 -log log.pimd-langevin.metal -screen screen + From 44b126a87d9800b1ff97a9d4b2a0ec59a93847aa Mon Sep 17 00:00:00 2001 From: jbcouli Date: Thu, 16 Nov 2023 10:35:02 -0700 Subject: [PATCH 028/116] correct typo and link in body particles doc --- doc/src/Howto_body.rst | 6 +++--- doc/src/pair_body_rounded_polyhedron.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/Howto_body.rst b/doc/src/Howto_body.rst index 88fa2d9c97..115b7797c8 100644 --- a/doc/src/Howto_body.rst +++ b/doc/src/Howto_body.rst @@ -170,9 +170,9 @@ with this body style to compute body/body and body/non-body interactions. The *rounded/polygon* body style represents body particles as a 2d polygon with a variable number of N vertices. This style can only be used for 2d models; see the :doc:`boundary ` command. See the -"pair_style body/rounded/polygon" page for a diagram of two -squares with rounded circles at the vertices. Special cases for N = 1 -(circle) and N = 2 (rod with rounded ends) can also be specified. +:doc:`pair_style body/rounded/polygon ` page for +a diagram of two squares with rounded circles at the vertices. Special cases +for N = 1 (circle) and N = 2 (rod with rounded ends) can also be specified. One use of this body style is for 2d discrete element models, as described in :ref:`Fraige `. diff --git a/doc/src/pair_body_rounded_polyhedron.rst b/doc/src/pair_body_rounded_polyhedron.rst index f2f7c1676a..b3eaf72321 100644 --- a/doc/src/pair_body_rounded_polyhedron.rst +++ b/doc/src/pair_body_rounded_polyhedron.rst @@ -40,7 +40,7 @@ rounded/polyhedron particles. This pairwise interaction between the rounded polyhedra is described in :ref:`Wang `, where a polyhedron does not have sharp corners and edges, but is rounded at its vertices and edges by spheres -centered on each vertex with a specified diameter. The edges if the +centered on each vertex with a specified diameter. The edges of the polyhedron are defined between pairs of adjacent vertices. Its faces are defined by a loop of edges. The sphere diameter for each polygon is specified in the data file read by the :doc:`read data ` From ac5f28719b7cf4170edbdfad11417eb9c46c226b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 16 Nov 2023 15:52:45 -0500 Subject: [PATCH 029/116] explicitly include `if constexpr` only when C++17 or later is selected --- src/fmt/compile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmt/compile.h b/src/fmt/compile.h index a4c7e49563..ef8d5c0256 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -61,7 +61,7 @@ const T& first(const T& value, const Tail&...) { return value; } -#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) +#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) && FMT_CPLUSPLUS >= 201703L template struct type_list {}; // Returns a reference to the argument at index N from [first, rest...]. From ddd5cc1a737281f52f7acbda545623b7c3e8435c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 16 Nov 2023 15:53:23 -0500 Subject: [PATCH 030/116] avoid segfault on command errors in force style unit tests and print error mesage instead --- unittest/force-styles/test_angle_style.cpp | 19 ++++++++++++++-- unittest/force-styles/test_bond_style.cpp | 19 ++++++++++++++-- unittest/force-styles/test_dihedral_style.cpp | 19 ++++++++++++++-- unittest/force-styles/test_improper_style.cpp | 19 ++++++++++++++-- unittest/force-styles/test_pair_style.cpp | 22 ++++++++++++++++--- 5 files changed, 87 insertions(+), 11 deletions(-) diff --git a/unittest/force-styles/test_angle_style.cpp b/unittest/force-styles/test_angle_style.cpp index bd0e3d8859..3476ae8dde 100644 --- a/unittest/force-styles/test_angle_style.cpp +++ b/unittest/force-styles/test_angle_style.cpp @@ -26,6 +26,7 @@ #include "angle.h" #include "atom.h" #include "compute.h" +#include "exceptions.h" #include "fmt/format.h" #include "force.h" #include "info.h" @@ -59,7 +60,7 @@ void cleanup_lammps(LAMMPS *lmp, const TestConfig &cfg) delete lmp; } -LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newton = true) +LAMMPS *init_lammps(LAMMPS::argv &args, const TestConfig &cfg, const bool newton = true) { LAMMPS *lmp; @@ -90,7 +91,21 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newto // utility lambdas to improve readability auto command = [&](const std::string &line) { - lmp->input->one(line); + try { + lmp->input->one(line); + } catch (LAMMPSAbortException &ae) { + fprintf(stderr, "LAMMPS Error: %s\n", ae.what()); + exit(2); + } catch (LAMMPSException &e) { + fprintf(stderr, "LAMMPS Error: %s\n", e.what()); + exit(3); + } catch (fmt::format_error &fe) { + fprintf(stderr, "fmt::format_error: %s\n", fe.what()); + exit(4); + } catch (std::exception &e) { + fprintf(stderr, "General exception: %s\n", e.what()); + exit(5); + } }; auto parse_input_script = [&](const std::string &filename) { lmp->input->file(filename.c_str()); diff --git a/unittest/force-styles/test_bond_style.cpp b/unittest/force-styles/test_bond_style.cpp index aa99f41f8d..f7ecd835b0 100644 --- a/unittest/force-styles/test_bond_style.cpp +++ b/unittest/force-styles/test_bond_style.cpp @@ -26,6 +26,7 @@ #include "atom.h" #include "bond.h" #include "compute.h" +#include "exceptions.h" #include "fmt/format.h" #include "force.h" #include "info.h" @@ -59,7 +60,7 @@ void cleanup_lammps(LAMMPS *lmp, const TestConfig &cfg) delete lmp; } -LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newton = true) +LAMMPS *init_lammps(LAMMPS::argv &args, const TestConfig &cfg, const bool newton = true) { LAMMPS *lmp; @@ -90,7 +91,21 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newto // utility lambdas to improve readability auto command = [&](const std::string &line) { - lmp->input->one(line); + try { + lmp->input->one(line); + } catch (LAMMPSAbortException &ae) { + fprintf(stderr, "LAMMPS Error: %s\n", ae.what()); + exit(2); + } catch (LAMMPSException &e) { + fprintf(stderr, "LAMMPS Error: %s\n", e.what()); + exit(3); + } catch (fmt::format_error &fe) { + fprintf(stderr, "fmt::format_error: %s\n", fe.what()); + exit(4); + } catch (std::exception &e) { + fprintf(stderr, "General exception: %s\n", e.what()); + exit(5); + } }; auto parse_input_script = [&](const std::string &filename) { lmp->input->file(filename.c_str()); diff --git a/unittest/force-styles/test_dihedral_style.cpp b/unittest/force-styles/test_dihedral_style.cpp index 25690fc33d..662d63909d 100644 --- a/unittest/force-styles/test_dihedral_style.cpp +++ b/unittest/force-styles/test_dihedral_style.cpp @@ -26,6 +26,7 @@ #include "atom.h" #include "compute.h" #include "dihedral.h" +#include "exceptions.h" #include "fmt/format.h" #include "force.h" #include "info.h" @@ -59,7 +60,7 @@ void cleanup_lammps(LAMMPS *lmp, const TestConfig &cfg) delete lmp; } -LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newton = true) +LAMMPS *init_lammps(LAMMPS::argv &args, const TestConfig &cfg, const bool newton = true) { LAMMPS *lmp = new LAMMPS(args, MPI_COMM_WORLD); @@ -88,7 +89,21 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newto // utility lambdas to improve readability auto command = [&](const std::string &line) { - lmp->input->one(line); + try { + lmp->input->one(line); + } catch (LAMMPSAbortException &ae) { + fprintf(stderr, "LAMMPS Error: %s\n", ae.what()); + exit(2); + } catch (LAMMPSException &e) { + fprintf(stderr, "LAMMPS Error: %s\n", e.what()); + exit(3); + } catch (fmt::format_error &fe) { + fprintf(stderr, "fmt::format_error: %s\n", fe.what()); + exit(4); + } catch (std::exception &e) { + fprintf(stderr, "General exception: %s\n", e.what()); + exit(5); + } }; auto parse_input_script = [&](const std::string &filename) { lmp->input->file(filename.c_str()); diff --git a/unittest/force-styles/test_improper_style.cpp b/unittest/force-styles/test_improper_style.cpp index b4096df868..dc1b846b5a 100644 --- a/unittest/force-styles/test_improper_style.cpp +++ b/unittest/force-styles/test_improper_style.cpp @@ -25,6 +25,7 @@ #include "atom.h" #include "compute.h" +#include "exceptions.h" #include "fmt/format.h" #include "force.h" #include "improper.h" @@ -59,7 +60,7 @@ void cleanup_lammps(LAMMPS *lmp, const TestConfig &cfg) delete lmp; } -LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newton = true) +LAMMPS *init_lammps(LAMMPS::argv &args, const TestConfig &cfg, const bool newton = true) { LAMMPS *lmp; @@ -90,7 +91,21 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newto // utility lambdas to improve readability auto command = [&](const std::string &line) { - lmp->input->one(line); + try { + lmp->input->one(line); + } catch (LAMMPSAbortException &ae) { + fprintf(stderr, "LAMMPS Error: %s\n", ae.what()); + exit(2); + } catch (LAMMPSException &e) { + fprintf(stderr, "LAMMPS Error: %s\n", e.what()); + exit(3); + } catch (fmt::format_error &fe) { + fprintf(stderr, "fmt::format_error: %s\n", fe.what()); + exit(4); + } catch (std::exception &e) { + fprintf(stderr, "General exception: %s\n", e.what()); + exit(5); + } }; auto parse_input_script = [&](const std::string &filename) { lmp->input->file(filename.c_str()); diff --git a/unittest/force-styles/test_pair_style.cpp b/unittest/force-styles/test_pair_style.cpp index 8ad2ce9aaa..9db9c7ac8b 100644 --- a/unittest/force-styles/test_pair_style.cpp +++ b/unittest/force-styles/test_pair_style.cpp @@ -26,6 +26,7 @@ #include "atom.h" #include "compute.h" #include "domain.h" +#include "exceptions.h" #include "force.h" #include "info.h" #include "input.h" @@ -61,7 +62,7 @@ void cleanup_lammps(LAMMPS *lmp, const TestConfig &cfg) delete lmp; } -LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newton = true) +LAMMPS *init_lammps(LAMMPS::argv &args, const TestConfig &cfg, const bool newton = true) { LAMMPS *lmp; @@ -92,8 +93,23 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool newto // utility lambdas to improve readability auto command = [&](const std::string &line) { - lmp->input->one(line); + try { + lmp->input->one(line); + } catch (LAMMPSAbortException &ae) { + fprintf(stderr, "LAMMPS Error: %s\n", ae.what()); + exit(2); + } catch (LAMMPSException &e) { + fprintf(stderr, "LAMMPS Error: %s\n", e.what()); + exit(3); + } catch (fmt::format_error &fe) { + fprintf(stderr, "fmt::format_error: %s\n", fe.what()); + exit(4); + } catch (std::exception &e) { + fprintf(stderr, "General exception: %s\n", e.what()); + exit(5); + } }; + auto parse_input_script = [&](const std::string &filename) { lmp->input->file(filename.c_str()); }; @@ -760,7 +776,7 @@ TEST(PairStyle, gpu) "screen", "-nocite", "-sf", "gpu"}; LAMMPS::argv args_noneigh = {"PairStyle", "-log", "none", "-echo", "screen", "-nocite", "-sf", "gpu", "-pk", "gpu", "0", "neigh", "no"}; - LAMMPS::argv args = args_neigh; + LAMMPS::argv args = args_neigh; // cannot use GPU neighbor list with hybrid pair style (yet) if (test_config.pair_style.substr(0, 6) == "hybrid") { From 718cfc4562150ad87dc1def13003f0060ce8de39 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 16 Nov 2023 14:31:50 -0700 Subject: [PATCH 031/116] Fix indexing bug --- src/npair_trim.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/npair_trim.cpp b/src/npair_trim.cpp index a4b6c1c6a1..f026466f92 100644 --- a/src/npair_trim.cpp +++ b/src/npair_trim.cpp @@ -63,7 +63,7 @@ void NPairTrim::build(NeighList *list) neighptr = ipage->vget(); const int i = ilist_copy[ii]; - ilist[i] = i; + ilist[ii] = i; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; From 6f366b8c74a4eb2aa3ddd7718f6ffa4666e2dd62 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 16 Nov 2023 16:21:59 -0700 Subject: [PATCH 032/116] Fix issues with sorting neigh list by cutoff distance --- src/neighbor.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 8d12edeef2..20d6306572 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -1123,15 +1123,14 @@ int Neighbor::init_pair() } /* ---------------------------------------------------------------------- - sort NeighRequests by cutoff distance - to find smallest list for trimming + sort NeighRequests by cutoff distance for trimming ------------------------------------------------------------------------- */ void Neighbor::sort_requests() { - NeighRequest *jrq; + NeighRequest *irq,*jrq; int i,j,jmin; - double jcut; + double icut,jcut; delete[] j_sorted; j_sorted = new int[nrequest]; @@ -1139,20 +1138,24 @@ void Neighbor::sort_requests() for (i = 0; i < nrequest; i++) j_sorted[i] = i; - for (i = 0; i < nrequest; i++) { - double cutoff_min = cutneighmax; + for (i = 0; i < nrequest-1; i++) { + irq = requests[j_sorted[i]]; + if (irq->cut) icut = irq->cutoff; + else icut = cutneighmax; + double cutoff_min = icut; jmin = i; - for (j = i; j < nrequest-1; j++) { + for (j = i+1; j < nrequest; j++) { jrq = requests[j_sorted[j]]; if (jrq->cut) jcut = jrq->cutoff; else jcut = cutneighmax; - if (jcut <= cutoff_min) { + if (jcut < cutoff_min) { cutoff_min = jcut; jmin = j; } } + int tmp = j_sorted[i]; j_sorted[i] = j_sorted[jmin]; j_sorted[jmin] = tmp; From 4608444ada642a4d31a32a2d54454f57cabb6949 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 16 Nov 2023 16:23:09 -0700 Subject: [PATCH 033/116] Add trim option to skip neighbor list build styles --- src/KOKKOS/npair_skip_kokkos.cpp | 44 +++-- src/KOKKOS/npair_skip_kokkos.h | 42 ++++- src/neighbor.cpp | 56 +++--- src/npair_skip_respa_trim.cpp | 193 ++++++++++++++++++++ src/npair_skip_respa_trim.h | 40 ++++ src/npair_skip_size_off2on_oneside_trim.cpp | 185 +++++++++++++++++++ src/npair_skip_size_off2on_oneside_trim.h | 40 ++++ src/npair_skip_size_off2on_trim.cpp | 112 ++++++++++++ src/npair_skip_size_off2on_trim.h | 40 ++++ src/npair_skip_size_trim.cpp | 102 +++++++++++ src/npair_skip_size_trim.h | 39 ++++ src/npair_skip_trim.cpp | 118 ++++++++++++ src/npair_skip_trim.h | 46 +++++ 13 files changed, 1019 insertions(+), 38 deletions(-) create mode 100644 src/npair_skip_respa_trim.cpp create mode 100644 src/npair_skip_respa_trim.h create mode 100644 src/npair_skip_size_off2on_oneside_trim.cpp create mode 100644 src/npair_skip_size_off2on_oneside_trim.h create mode 100644 src/npair_skip_size_off2on_trim.cpp create mode 100644 src/npair_skip_size_off2on_trim.h create mode 100644 src/npair_skip_size_trim.cpp create mode 100644 src/npair_skip_size_trim.h create mode 100644 src/npair_skip_trim.cpp create mode 100644 src/npair_skip_trim.h diff --git a/src/KOKKOS/npair_skip_kokkos.cpp b/src/KOKKOS/npair_skip_kokkos.cpp index 4492a3794a..15c0487010 100644 --- a/src/KOKKOS/npair_skip_kokkos.cpp +++ b/src/KOKKOS/npair_skip_kokkos.cpp @@ -23,8 +23,8 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -template -NPairSkipKokkos::NPairSkipKokkos(LAMMPS *lmp) : NPair(lmp) { +template +NPairSkipKokkos::NPairSkipKokkos(LAMMPS *lmp) : NPair(lmp) { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; d_inum = typename AT::t_int_scalar("npair_skip:inum"); @@ -38,13 +38,18 @@ NPairSkipKokkos::NPairSkipKokkos(LAMMPS *lmp) : NPair(lmp) { if ghost, also store neighbors of ghost atoms & set inum,gnum correctly ------------------------------------------------------------------------- */ -template -void NPairSkipKokkos::build(NeighList *list) +template +void NPairSkipKokkos::build(NeighList *list) { atomKK->sync(execution_space,TYPE_MASK); type = atomKK->k_type.view(); nlocal = atom->nlocal; + if (TRIM) { + x = atomKK->k_x.view(); + atomKK->sync(execution_space,X_MASK); + cutsq_custom = cutoff_custom*cutoff_custom; + } NeighListKokkos* k_list_skip = static_cast*>(list->listskip); d_ilist_skip = k_list_skip->d_ilist; @@ -100,13 +105,20 @@ void NPairSkipKokkos::build(NeighList *list) copymode = 0; } -template +template KOKKOS_INLINE_FUNCTION -void NPairSkipKokkos::operator()(TagNPairSkipCompute, const int &ii, int &inum, const bool &final) const { +void NPairSkipKokkos::operator()(TagNPairSkipCompute, const int &ii, int &inum, const bool &final) const { const int i = d_ilist_skip(ii); const int itype = type(i); + F_FLOAT xtmp,ytmp,ztmp; + if (TRIM) { + xtmp = x(i,0); + ytmp = x(i,1); + ztmp = x(i,2); + } + if (!d_iskip(itype)) { if (final) { @@ -123,6 +135,15 @@ void NPairSkipKokkos::operator()(TagNPairSkipCompute, const int &ii, const int joriginal = d_neighbors_skip(i,jj); int j = joriginal & NEIGHMASK; if (d_ijskip(itype,type(j))) continue; + + if (TRIM) { + const double delx = xtmp - x(j,0); + const double dely = ytmp - x(j,1); + const double delz = ztmp - x(j,2); + const double rsq = delx*delx + dely*dely + delz*delz; + if (rsq > cutsq_custom) continue; + } + neighbors_i(n++) = joriginal; } @@ -139,16 +160,17 @@ void NPairSkipKokkos::operator()(TagNPairSkipCompute, const int &ii, } } -template +template KOKKOS_INLINE_FUNCTION -void NPairSkipKokkos::operator()(TagNPairSkipCountLocal, const int &i, int &num) const { +void NPairSkipKokkos::operator()(TagNPairSkipCountLocal, const int &i, int &num) const { if (d_ilist[i] < nlocal) num++; } - namespace LAMMPS_NS { -template class NPairSkipKokkos; +template class NPairSkipKokkos; +template class NPairSkipKokkos; #ifdef LMP_KOKKOS_GPU -template class NPairSkipKokkos; +template class NPairSkipKokkos; +template class NPairSkipKokkos; #endif } diff --git a/src/KOKKOS/npair_skip_kokkos.h b/src/KOKKOS/npair_skip_kokkos.h index fd1217bef4..7672a2c36c 100644 --- a/src/KOKKOS/npair_skip_kokkos.h +++ b/src/KOKKOS/npair_skip_kokkos.h @@ -13,33 +13,62 @@ #ifdef NPAIR_CLASS // clang-format off -typedef NPairSkipKokkos NPairKokkosSkipDevice; +typedef NPairSkipKokkos NPairKokkosSkipDevice; NPairStyle(skip/kk/device, NPairKokkosSkipDevice, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE); -typedef NPairSkipKokkos NPairKokkosSkipGhostDevice; +typedef NPairSkipKokkos NPairKokkosSkipGhostDevice; NPairStyle(skip/ghost/kk/device, NPairKokkosSkipGhostDevice, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE); -typedef NPairSkipKokkos NPairKokkosSkipHost; +typedef NPairSkipKokkos NPairKokkosSkipHost; NPairStyle(skip/kk/host, NPairKokkosSkipHost, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_KOKKOS_HOST); -typedef NPairSkipKokkos NPairKokkosSkipGhostHost; +typedef NPairSkipKokkos NPairKokkosSkipGhostHost; NPairStyle(skip/ghost/kk/host, NPairKokkosSkipGhostHost, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST); + +typedef NPairSkipKokkos NPairKokkosSkipTrimDevice; +NPairStyle(skip/kk/device, + NPairKokkosSkipTrimDevice, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM |NP_KOKKOS_DEVICE); + +typedef NPairSkipKokkos NPairKokkosSkipTrimGhostDevice; +NPairStyle(skip/ghost/kk/device, + NPairKokkosSkipTrimGhostDevice, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_KOKKOS_DEVICE); + +typedef NPairSkipKokkos NPairKokkosSkipTrimHost; +NPairStyle(skip/kk/host, + NPairKokkosSkipTrimHost, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST); + +typedef NPairSkipKokkos NPairKokkosSkipTrimGhostHost; +NPairStyle(skip/ghost/kk/host, + NPairKokkosSkipTrimGhostHost, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_KOKKOS_HOST); + // clang-format on #else @@ -55,7 +84,7 @@ namespace LAMMPS_NS { struct TagNPairSkipCompute{}; struct TagNPairSkipCountLocal{}; -template +template class NPairSkipKokkos : public NPair { public: typedef DeviceType device_type; @@ -72,8 +101,9 @@ class NPairSkipKokkos : public NPair { void operator()(TagNPairSkipCountLocal, const int&, int&) const; private: - int nlocal,num_skip; + int nlocal,num_skip,cutsq_custom; + typename AT::t_x_array_randomread x; typename AT::t_int_1d_randomread type; typename AT::t_int_scalar d_inum; diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 20d6306572..b6f3363b0c 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -852,6 +852,7 @@ int Neighbor::init_pair() // morph requests in various ways // purpose is to avoid duplicate or inefficient builds + // also sort requests by cutoff distance for trimming // may add new requests if a needed request to derive from does not exist // methods: // (1) unique = create unique lists if cutoff is explicitly set @@ -868,15 +869,9 @@ int Neighbor::init_pair() int nrequest_original = nrequest; morph_unique(); + sort_requests(); morph_skip(); morph_granular(); // this method can change flags set by requestor - - // sort requests by cutoff distance for trimming, used by - // morph_halffull and morph_copy_trim. Must come after - // morph_skip() which change the number of requests - - sort_requests(); - morph_halffull(); morph_copy_trim(); @@ -1210,11 +1205,15 @@ void Neighbor::morph_unique() void Neighbor::morph_skip() { - int i,j,inewton,jnewton; + int i,j,jj,inewton,jnewton,icut,jcut; NeighRequest *irq,*jrq,*nrq; - for (i = 0; i < nrequest; i++) { - irq = requests[i]; + // loop over irq from largest to smallest cutoff + // to prevent adding unecessary neighbor lists + + for (i = nrequest-1; i >= 0; i--) { + irq = requests[j_sorted[i]]; + int trim_flag = irq->trim; // only processing skip lists @@ -1229,7 +1228,9 @@ void Neighbor::morph_skip() // check all other lists - for (j = 0; j < nrequest; j++) { + for (jj = 0; jj < nrequest; jj++) { + j = j_sorted[jj]; + if (i == j) continue; jrq = requests[j]; @@ -1252,10 +1253,20 @@ void Neighbor::morph_skip() if (jnewton == 0) jnewton = force->newton_pair ? 1 : 2; if (inewton != jnewton) continue; + // trim a list with longer cutoff + + if (irq->cut) icut = irq->cutoff; + else icut = cutneighmax; + + if (jrq->cut) jcut = jrq->cutoff; + else jcut = cutneighmax; + + if (icut > jcut) continue; + else if (icut != jcut) trim_flag = 1; + // these flags must be same, // else 2 lists do not store same pairs // or their data structures are different - // this includes custom cutoff set by requestor // NOTE: need check for 2 Kokkos flags? if (irq->ghost != jrq->ghost) continue; @@ -1267,8 +1278,6 @@ void Neighbor::morph_skip() if (irq->kokkos_host != jrq->kokkos_host) continue; if (irq->kokkos_device != jrq->kokkos_device) continue; if (irq->ssa != jrq->ssa) continue; - if (irq->cut != jrq->cut) continue; - if (irq->cutoff != jrq->cutoff) continue; // 2 lists are a match @@ -1282,8 +1291,10 @@ void Neighbor::morph_skip() // note: parents of skip lists do not have associated history // b/c child skip lists have the associated history - if (j < nrequest) irq->skiplist = j; - else { + if (jj < nrequest) { + irq->skiplist = j; + irq->trim = trim_flag; + } else { int newrequest = request(this,-1); irq->skiplist = newrequest; @@ -1293,6 +1304,8 @@ void Neighbor::morph_skip() nrq->neigh = 1; nrq->skip = 0; if (irq->unique) nrq->unique = 1; + + sort_requests(); } } } @@ -1394,8 +1407,7 @@ void Neighbor::morph_halffull() // check all other lists for (jj = 0; jj < nrequest; jj++) { - if (irq->cut) j = j_sorted[jj]; - else j = jj; + j = j_sorted[jj]; jrq = requests[j]; @@ -1473,8 +1485,7 @@ void Neighbor::morph_copy_trim() // check all other lists for (jj = 0; jj < nrequest; jj++) { - if (irq->cut) j = j_sorted[jj]; - else j = jj; + j = j_sorted[jj]; if (i == j) continue; jrq = requests[j]; @@ -1786,7 +1797,10 @@ void Neighbor::print_pairwise_info() else out += fmt::format(", half/full from ({})",rq->halffulllist+1); else if (rq->skip) - out += fmt::format(", skip from ({})",rq->skiplist+1); + if (rq->trim) + out += fmt::format(", skip trim from ({})",rq->skiplist+1); + else + out += fmt::format(", skip from ({})",rq->skiplist+1); out += "\n"; // list of neigh list attributes diff --git a/src/npair_skip_respa_trim.cpp b/src/npair_skip_respa_trim.cpp new file mode 100644 index 0000000000..64b1c4d716 --- /dev/null +++ b/src/npair_skip_respa_trim.cpp @@ -0,0 +1,193 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_skip_respa_trim.h" + +#include "atom.h" +#include "error.h" +#include "my_page.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +NPairSkipRespaTrim::NPairSkipRespaTrim(LAMMPS *lmp) : NPair(lmp) {} + +/* ---------------------------------------------------------------------- + build skip list for subset of types from parent list + iskip and ijskip flag which atom types and type pairs to skip + this is for respa lists, copy the inner/middle values from parent +------------------------------------------------------------------------- */ + +void NPairSkipRespaTrim::build(NeighList *list) +{ + int i,j,ii,jj,n,itype,jnum,joriginal,n_inner,n_middle; + int *neighptr,*jlist,*neighptr_inner,*neighptr_middle; + + int *type = atom->type; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + MyPage *ipage = list->ipage; + + int *ilist_skip = list->listskip->ilist; + int *numneigh_skip = list->listskip->numneigh; + int **firstneigh_skip = list->listskip->firstneigh; + int inum_skip = list->listskip->inum; + + int *iskip = list->iskip; + int **ijskip = list->ijskip; + + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; + MyPage *ipage_inner = list->ipage_inner; + int *numneigh_inner_skip = list->listskip->numneigh_inner; + int **firstneigh_inner_skip = list->listskip->firstneigh_inner; + + int *ilist_middle,*numneigh_middle,**firstneigh_middle; + MyPage *ipage_middle; + int *numneigh_middle_skip,**firstneigh_middle_skip; + int respamiddle = list->respamiddle; + if (respamiddle) { + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; + ipage_middle = list->ipage_middle; + numneigh_middle_skip = list->listskip->numneigh_middle; + firstneigh_middle_skip = list->listskip->firstneigh_middle; + } + + int inum = 0; + ipage->reset(); + ipage_inner->reset(); + if (respamiddle) ipage_middle->reset(); + + double **x = atom->x; + double xtmp, ytmp, ztmp; + double delx, dely, delz, rsq; + double cutsq_custom = cutoff_custom * cutoff_custom; + + // loop over atoms in other list + // skip I atom entirely if iskip is set for type[I] + // skip I,J pair if ijskip is set for type[I],type[J] + + for (ii = 0; ii < inum_skip; ii++) { + i = ilist_skip[ii]; + itype = type[i]; + if (iskip[itype]) continue; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + n = n_inner = 0; + neighptr = ipage->vget(); + neighptr_inner = ipage_inner->vget(); + if (respamiddle) { + n_middle = 0; + neighptr_middle = ipage_middle->vget(); + } + + // loop over parent outer rRESPA list + + jlist = firstneigh_skip[i]; + jnum = numneigh_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + neighptr[n++] = joriginal; + } + + // loop over parent inner rRESPA list + + jlist = firstneigh_inner_skip[i]; + jnum = numneigh_inner_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + neighptr_inner[n_inner++] = joriginal; + } + + // loop over parent middle rRESPA list + + if (respamiddle) { + jlist = firstneigh_middle_skip[i]; + jnum = numneigh_middle_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + neighptr_middle[n_middle++] = joriginal; + } + } + + ilist[inum] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + ipage->vgot(n); + if (ipage->status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + ilist_inner[inum] = i; + firstneigh_inner[i] = neighptr_inner; + numneigh_inner[i] = n_inner; + ipage_inner->vgot(n); + if (ipage_inner->status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + if (respamiddle) { + ilist_middle[inum] = i; + firstneigh_middle[i] = neighptr_middle; + numneigh_middle[i] = n_middle; + ipage_middle->vgot(n); + if (ipage_middle->status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + + inum++; + } + + list->inum = inum; + list->inum_inner = inum; + if (respamiddle) list->inum_middle = inum; +} diff --git a/src/npair_skip_respa_trim.h b/src/npair_skip_respa_trim.h new file mode 100644 index 0000000000..f10b726cbe --- /dev/null +++ b/src/npair_skip_respa_trim.h @@ -0,0 +1,40 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS +// clang-format off +NPairStyle(skip/half/respa/trim, + NPairSkipRespaTrim, + NP_SKIP | NP_RESPA | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); +// clang-format on +#else + +#ifndef LMP_NPAIR_SKIP_RESPA_TRIM_H +#define LMP_NPAIR_SKIP_RESPA_TRIM_H + +#include "npair.h" + +namespace LAMMPS_NS { + +class NPairSkipRespaTrim : public NPair { + public: + NPairSkipRespaTrim(class LAMMPS *); + void build(class NeighList *) override; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/npair_skip_size_off2on_oneside_trim.cpp b/src/npair_skip_size_off2on_oneside_trim.cpp new file mode 100644 index 0000000000..91940d3135 --- /dev/null +++ b/src/npair_skip_size_off2on_oneside_trim.cpp @@ -0,0 +1,185 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_skip_size_off2on_oneside_trim.h" + +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "my_page.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +NPairSkipSizeOff2onOnesideTrim::NPairSkipSizeOff2onOnesideTrim(LAMMPS *lmp) : + NPair(lmp) {} + +/* ---------------------------------------------------------------------- + build skip list for subset of types from parent list + iskip and ijskip flag which atom types and type pairs to skip + parent non-skip list used newton off and was not onesided, + this skip list is newton on and onesided +------------------------------------------------------------------------- */ + +void NPairSkipSizeOff2onOnesideTrim::build(NeighList *list) +{ + int i,j,ii,jj,itype,jnum,joriginal,flip,tmp; + int *surf,*jlist; + + int *type = atom->type; + int nlocal = atom->nlocal; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + MyPage *ipage = list->ipage; + + int *ilist_skip = list->listskip->ilist; + int *numneigh_skip = list->listskip->numneigh; + int **firstneigh_skip = list->listskip->firstneigh; + int inum_skip = list->listskip->inum; + + int *iskip = list->iskip; + int **ijskip = list->ijskip; + + if (domain->dimension == 2) surf = atom->line; + else surf = atom->tri; + + int inum = 0; + ipage->reset(); + + double **x = atom->x; + double xtmp, ytmp, ztmp; + double delx, dely, delz, rsq; + double cutsq_custom = cutoff_custom * cutoff_custom; + + // two loops over parent list required, one to count, one to store + // because onesided constraint means pair I,J may be stored with I or J + // so don't know in advance how much space to alloc for each atom's neighs + + // first loop over atoms in other list to count neighbors + // skip I atom entirely if iskip is set for type[I] + // skip I,J pair if ijskip is set for type[I],type[J] + + for (i = 0; i < nlocal; i++) numneigh[i] = 0; + + for (ii = 0; ii < inum_skip; ii++) { + i = ilist_skip[ii]; + itype = type[i]; + if (iskip[itype]) continue; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over parent non-skip size list + + jlist = firstneigh_skip[i]; + jnum = numneigh_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + // flip I,J if necessary to satisfy onesided constraint + // do not keep if I is now ghost + + if (surf[i] >= 0) { + if (j >= nlocal) continue; + tmp = i; + i = j; + j = tmp; + flip = 1; + } else flip = 0; + + numneigh[i]++; + if (flip) i = j; + } + } + + // allocate all per-atom neigh list chunks + + for (i = 0; i < nlocal; i++) { + if (numneigh[i] == 0) continue; + firstneigh[i] = ipage->get(numneigh[i]); + if (ipage->status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + + // second loop over atoms in other list to store neighbors + // skip I atom entirely if iskip is set for type[I] + // skip I,J pair if ijskip is set for type[I],type[J] + + for (i = 0; i < nlocal; i++) numneigh[i] = 0; + + for (ii = 0; ii < inum_skip; ii++) { + i = ilist_skip[ii]; + itype = type[i]; + if (iskip[itype]) continue; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over parent non-skip size list and optionally its history info + + jlist = firstneigh_skip[i]; + jnum = numneigh_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + // flip I,J if necessary to satisfy onesided constraint + // do not keep if I is now ghost + + if (surf[i] >= 0) { + if (j >= nlocal) continue; + tmp = i; + i = j; + j = tmp; + flip = 1; + } else flip = 0; + + // store j in neigh list, not joriginal, like other neigh methods + // OK, b/c there is no special list flagging for surfs + + firstneigh[i][numneigh[i]] = j; + numneigh[i]++; + if (flip) i = j; + } + + // only add atom I to ilist if it has neighbors + + if (numneigh[i]) ilist[inum++] = i; + } + + list->inum = inum; +} diff --git a/src/npair_skip_size_off2on_oneside_trim.h b/src/npair_skip_size_off2on_oneside_trim.h new file mode 100644 index 0000000000..236b886fe4 --- /dev/null +++ b/src/npair_skip_size_off2on_oneside_trim.h @@ -0,0 +1,40 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS +// clang-format off +NPairStyle(skip/size/off2on/oneside/trim, + NPairSkipSizeOff2onOnesideTrim, + NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | + NP_ORTHO | NP_TRI | NP_TRIM); +// clang-format on +#else + +#ifndef LMP_NPAIR_SKIP_SIZE_OFF2ON_ONESIDE_TRIM_H +#define LMP_NPAIR_SKIP_SIZE_OFF2ON_ONESIDE_TRIM_H + +#include "npair.h" + +namespace LAMMPS_NS { + +class NPairSkipSizeOff2onOnesideTrim : public NPair { + public: + NPairSkipSizeOff2onOnesideTrim(class LAMMPS *); + void build(class NeighList *) override; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/npair_skip_size_off2on_trim.cpp b/src/npair_skip_size_off2on_trim.cpp new file mode 100644 index 0000000000..9591bbc4eb --- /dev/null +++ b/src/npair_skip_size_off2on_trim.cpp @@ -0,0 +1,112 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_skip_size_off2on_trim.h" + +#include "atom.h" +#include "error.h" +#include "my_page.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +NPairSkipSizeOff2onTrim::NPairSkipSizeOff2onTrim(LAMMPS *lmp) : NPair(lmp) {} + +/* ---------------------------------------------------------------------- + build skip list for subset of types from parent list + iskip and ijskip flag which atom types and type pairs to skip + parent non-skip list used newton off, this skip list is newton on +------------------------------------------------------------------------- */ + +void NPairSkipSizeOff2onTrim::build(NeighList *list) +{ + int i, j, ii, jj, n, itype, jnum, joriginal; + tagint itag, jtag; + int *neighptr, *jlist; + + tagint *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + MyPage *ipage = list->ipage; + + int *ilist_skip = list->listskip->ilist; + int *numneigh_skip = list->listskip->numneigh; + int **firstneigh_skip = list->listskip->firstneigh; + int inum_skip = list->listskip->inum; + + int *iskip = list->iskip; + int **ijskip = list->ijskip; + + int inum = 0; + ipage->reset(); + + double **x = atom->x; + double xtmp, ytmp, ztmp; + double delx, dely, delz, rsq; + double cutsq_custom = cutoff_custom * cutoff_custom; + + // loop over atoms in other list + // skip I atom entirely if iskip is set for type[I] + // skip I,J pair if ijskip is set for type[I],type[J] + + for (ii = 0; ii < inum_skip; ii++) { + i = ilist_skip[ii]; + itype = type[i]; + if (iskip[itype]) continue; + itag = tag[i]; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + n = 0; + neighptr = ipage->vget(); + + // loop over parent non-skip size list and optionally its history info + + jlist = firstneigh_skip[i]; + jnum = numneigh_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + // only keep I,J when J = ghost if Itag < Jtag + + jtag = tag[j]; + if (j >= nlocal && jtag < itag) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + neighptr[n++] = joriginal; + } + + ilist[inum++] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + ipage->vgot(n); + if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); + } + list->inum = inum; +} diff --git a/src/npair_skip_size_off2on_trim.h b/src/npair_skip_size_off2on_trim.h new file mode 100644 index 0000000000..e471ddd2cc --- /dev/null +++ b/src/npair_skip_size_off2on_trim.h @@ -0,0 +1,40 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS +// clang-format off +NPairStyle(skip/size/off2on/trim, + NPairSkipSizeOff2onTrim, + NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); +// clang-format on +#else + +#ifndef LMP_NPAIR_SKIP_SIZE_OFF2ON_TRIM_H +#define LMP_NPAIR_SKIP_SIZE_OFF2ON_TRIM_H + +#include "npair.h" + +namespace LAMMPS_NS { + +class NPairSkipSizeOff2onTrim : public NPair { + public: + NPairSkipSizeOff2onTrim(class LAMMPS *); + void build(class NeighList *) override; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/npair_skip_size_trim.cpp b/src/npair_skip_size_trim.cpp new file mode 100644 index 0000000000..3fd8f912f9 --- /dev/null +++ b/src/npair_skip_size_trim.cpp @@ -0,0 +1,102 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_skip_size_trim.h" + +#include "atom.h" +#include "error.h" +#include "my_page.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +NPairSkipSizeTrim::NPairSkipSizeTrim(LAMMPS *lmp) : NPair(lmp) {} + +/* ---------------------------------------------------------------------- + build skip list for subset of types from parent list + iskip and ijskip flag which atom types and type pairs to skip +------------------------------------------------------------------------- */ + +void NPairSkipSizeTrim::build(NeighList *list) +{ + int i, j, ii, jj, n, itype, jnum, joriginal; + int *neighptr, *jlist; + + int *type = atom->type; + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + MyPage *ipage = list->ipage; + + int *ilist_skip = list->listskip->ilist; + int *numneigh_skip = list->listskip->numneigh; + int **firstneigh_skip = list->listskip->firstneigh; + int inum_skip = list->listskip->inum; + + int *iskip = list->iskip; + int **ijskip = list->ijskip; + + int inum = 0; + ipage->reset(); + + double **x = atom->x; + double xtmp, ytmp, ztmp; + double delx, dely, delz, rsq; + double cutsq_custom = cutoff_custom * cutoff_custom; + + // loop over atoms in other list + // skip I atom entirely if iskip is set for type[I] + // skip I,J pair if ijskip is set for type[I],type[J] + + for (ii = 0; ii < inum_skip; ii++) { + i = ilist_skip[ii]; + itype = type[i]; + if (iskip[itype]) continue; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + n = 0; + neighptr = ipage->vget(); + + // loop over parent non-skip size list + + jlist = firstneigh_skip[i]; + jnum = numneigh_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + neighptr[n++] = joriginal; + } + + ilist[inum++] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + ipage->vgot(n); + if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); + } + + list->inum = inum; +} diff --git a/src/npair_skip_size_trim.h b/src/npair_skip_size_trim.h new file mode 100644 index 0000000000..e94b2f5f29 --- /dev/null +++ b/src/npair_skip_size_trim.h @@ -0,0 +1,39 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS +// clang-format off +NPairStyle(skip/half/size/trim, + NPairSkipSizeTrim, + NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); +// clang-format on +#else + +#ifndef LMP_NPAIR_SKIP_SIZE_TRIM_H +#define LMP_NPAIR_SKIP_SIZE_TRIM_H + +#include "npair.h" + +namespace LAMMPS_NS { + +class NPairSkipSizeTrim : public NPair { + public: + NPairSkipSizeTrim(class LAMMPS *); + void build(class NeighList *) override; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/npair_skip_trim.cpp b/src/npair_skip_trim.cpp new file mode 100644 index 0000000000..a286a7e19e --- /dev/null +++ b/src/npair_skip_trim.cpp @@ -0,0 +1,118 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_skip_trim.h" + +#include "atom.h" +#include "error.h" +#include "my_page.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +NPairSkipTrim::NPairSkipTrim(LAMMPS *lmp) : NPair(lmp) {} + +/* ---------------------------------------------------------------------- + build skip list for subset of types from parent list + works for half and full lists + works for owned (non-ghost) list, also for ghost list + iskip and ijskip flag which atom types and type pairs to skip + if ghost, also store neighbors of ghost atoms & set inum,gnum correctly +------------------------------------------------------------------------- */ + +void NPairSkipTrim::build(NeighList *list) +{ + int i, j, ii, jj, n, itype, jnum, joriginal; + int *neighptr, *jlist; + + int *type = atom->type; + int nlocal = atom->nlocal; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + MyPage *ipage = list->ipage; + + int *ilist_skip = list->listskip->ilist; + int *numneigh_skip = list->listskip->numneigh; + int **firstneigh_skip = list->listskip->firstneigh; + int num_skip = list->listskip->inum; + if (list->ghost) num_skip += list->listskip->gnum; + + int *iskip = list->iskip; + int **ijskip = list->ijskip; + + int inum = 0; + ipage->reset(); + + double **x = atom->x; + double xtmp, ytmp, ztmp; + double delx, dely, delz, rsq; + double cutsq_custom = cutoff_custom * cutoff_custom; + + // loop over atoms in other list + // skip I atom entirely if iskip is set for type[I] + // skip I,J pair if ijskip is set for type[I],type[J] + + for (ii = 0; ii < num_skip; ii++) { + i = ilist_skip[ii]; + itype = type[i]; + if (iskip[itype]) continue; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + n = 0; + neighptr = ipage->vget(); + + // loop over parent non-skip list + + jlist = firstneigh_skip[i]; + jnum = numneigh_skip[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (ijskip[itype][type[j]]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) continue; + + neighptr[n++] = joriginal; + } + + ilist[inum++] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + ipage->vgot(n); + if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); + } + + list->inum = inum; + if (list->ghost) { + int num = 0; + for (i = 0; i < inum; i++) + if (ilist[i] < nlocal) + num++; + else + break; + list->inum = num; + list->gnum = inum - num; + } +} diff --git a/src/npair_skip_trim.h b/src/npair_skip_trim.h new file mode 100644 index 0000000000..f2a26d654e --- /dev/null +++ b/src/npair_skip_trim.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS +// clang-format off +NPairStyle(skip/trim, + NPairSkipTrim, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); + +NPairStyle(skip/ghost/trim, + NPairSkipTrim, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM); +// clang-format on +#else + +#ifndef LMP_NPAIR_SKIP_TRIM_H +#define LMP_NPAIR_SKIP_TRIM_H + +#include "npair.h" + +namespace LAMMPS_NS { + +class NPairSkipTrim : public NPair { + public: + NPairSkipTrim(class LAMMPS *); + void build(class NeighList *) override; +}; + +} // namespace LAMMPS_NS + +#endif +#endif From d739faf971cb16c040c0ebd85aa9567b6c1e4864 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 16 Nov 2023 21:14:55 -0500 Subject: [PATCH 034/116] disable `if constexpr` for all Intel compilers --- src/fmt/compile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmt/compile.h b/src/fmt/compile.h index ef8d5c0256..a1fb7d2935 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -61,7 +61,7 @@ const T& first(const T& value, const Tail&...) { return value; } -#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) && FMT_CPLUSPLUS >= 201703L +#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) && (FMT_CPLUSPLUS >= 201703L) && !FMT_ICC_VERSION template struct type_list {}; // Returns a reference to the argument at index N from [first, rest...]. From ea8234a87595bbc9af6dfc39bbbce0faad25d11b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 16 Nov 2023 22:08:00 -0500 Subject: [PATCH 035/116] treat immediate variables like equal style also for evaluating references to computes and fixes --- src/variable.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/variable.cpp b/src/variable.cpp index 9c0307341a..3bb49218fb 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1499,9 +1499,9 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) } } - // equal-style variable is being evaluated + // equal-style or immediate variable is being evaluated - if (style[ivar] == EQUAL) { + if ((style[ivar] == EQUAL) || (ivar < 0)) { // c_ID = scalar from global scalar @@ -1767,9 +1767,9 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) } } - // equal-style variable is being evaluated + // equal-style or immediate variable is being evaluated - if (style[ivar] == EQUAL) { + if ((style[ivar] == EQUAL) || (ivar < 0)) { // f_ID = scalar from global scalar From e59a66594331af6ed18a1ab0658f5cc505922bce Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 17 Nov 2023 02:06:06 -0500 Subject: [PATCH 036/116] add tests for immediate variables --- unittest/commands/test_variables.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/unittest/commands/test_variables.cpp b/unittest/commands/test_variables.cpp index 90a8d013a3..bae23e4594 100644 --- a/unittest/commands/test_variables.cpp +++ b/unittest/commands/test_variables.cpp @@ -282,6 +282,13 @@ TEST_F(VariableTest, AtomicSystem) ASSERT_DOUBLE_EQ(variable->compute_equal("v_rgsum"), 3.75); ASSERT_DOUBLE_EQ(variable->compute_equal("v_sum[1]"), 1.25); + // check handling of immediate variables + ASSERT_DOUBLE_EQ(variable->compute_equal("f_press[1]"), 0.0); + ASSERT_DOUBLE_EQ(variable->compute_equal("c_press"), 0.0); + ASSERT_DOUBLE_EQ(variable->compute_equal("c_press[2]"), 0.0); + ASSERT_DOUBLE_EQ(variable->compute_equal("1.5+3.25"), 4.75); + ASSERT_DOUBLE_EQ(variable->compute_equal("-2.5*1.5"), -3.75); + TEST_FAILURE(".*ERROR: Cannot redefine variable as a different style.*", command("variable one atom x");); TEST_FAILURE(".*ERROR: Cannot redefine variable as a different style.*", @@ -294,6 +301,8 @@ TEST_F(VariableTest, AtomicSystem) variable->compute_equal("v_self");); TEST_FAILURE(".*ERROR: Variable sum2: Inconsistent lengths in vector-style variable.*", variable->compute_equal("max(v_sum2)");); + TEST_FAILURE("ERROR: Mismatched fix in variable formula.*", + variable->compute_equal("f_press");); } TEST_F(VariableTest, Expressions) From a4da55942a0970c20287739e0cee29ed0df98ac1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 17 Nov 2023 03:16:27 -0500 Subject: [PATCH 037/116] check if creating unix domain socket failed --- src/MISC/fix_ipi.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/MISC/fix_ipi.cpp b/src/MISC/fix_ipi.cpp index 69e0f2a7f3..30a6fe893d 100644 --- a/src/MISC/fix_ipi.cpp +++ b/src/MISC/fix_ipi.cpp @@ -98,7 +98,7 @@ static void open_socket(int &sockfd, int inet, int port, char *host, Error *erro // creates socket sockfd = socket(res->ai_family, res->ai_socktype, res->ai_protocol); - if (sockfd < 0) error->one(FLERR, "Error opening socket"); + if (sockfd < 0) error->one(FLERR, "Error creating socket for fix ipi"); // makes connection if (connect(sockfd, res->ai_addr, res->ai_addrlen) < 0) @@ -116,6 +116,7 @@ static void open_socket(int &sockfd, int inet, int port, char *host, Error *erro // creates the socket sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) error->one(FLERR, "Error creating socket for fix ipi"); // connects if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) From 2c670e8d4438a0a3e0aecf452c7ff367b0b3d9b9 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 17 Nov 2023 08:25:44 -0500 Subject: [PATCH 038/116] update version strings to 17 Nov 2023 --- doc/lammps.1 | 4 ++-- doc/src/Commands_removed.rst | 4 ++-- doc/src/compute_composition_atom.rst | 2 +- doc/src/compute_property_grid.rst | 2 +- doc/src/compute_reduce.rst | 2 +- doc/src/compute_voronoi_atom.rst | 2 +- doc/src/dump.rst | 2 +- doc/src/dump_image.rst | 2 +- doc/src/fix_deposit.rst | 2 +- doc/src/fix_pimd.rst | 2 +- src/library.cpp | 8 ++++---- src/version.h | 3 +-- 12 files changed, 17 insertions(+), 18 deletions(-) diff --git a/doc/lammps.1 b/doc/lammps.1 index 79964d1680..766522d4aa 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "2 August 2023" "2023-08-2" +.TH LAMMPS "1" "17 November 2023" "2023-11-17" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 2 August 2023 +\- Molecular Dynamics Simulator. Version 17 November 2023 .SH SYNOPSIS .B lmp diff --git a/doc/src/Commands_removed.rst b/doc/src/Commands_removed.rst index 84cc534304..d0e723aabe 100644 --- a/doc/src/Commands_removed.rst +++ b/doc/src/Commands_removed.rst @@ -88,7 +88,7 @@ The same functionality is available through MPIIO package ------------- -.. deprecated:: TBD +.. deprecated:: 17Nov2023 The MPIIO package has been removed from LAMMPS since it was unmaintained for many years and thus not updated to incorporate required changes that @@ -107,7 +107,7 @@ see :doc:`restart `, :doc:`read_restart `, MSCG package ------------ -.. deprecated:: TBD +.. deprecated:: 17Nov2023 The MSCG package has been removed from LAMMPS since it was unmaintained for many years and instead superseded by the `OpenMSCG software diff --git a/doc/src/compute_composition_atom.rst b/doc/src/compute_composition_atom.rst index b7890fff8b..e65a3e9c95 100644 --- a/doc/src/compute_composition_atom.rst +++ b/doc/src/compute_composition_atom.rst @@ -36,7 +36,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 17Nov2023 Define a computation that calculates a local composition vector for each atom. For a central atom with :math:`M` neighbors within the neighbor cutoff sphere, diff --git a/doc/src/compute_property_grid.rst b/doc/src/compute_property_grid.rst index 20a4f19605..a0b9aba7dc 100644 --- a/doc/src/compute_property_grid.rst +++ b/doc/src/compute_property_grid.rst @@ -61,7 +61,7 @@ varying fastest, then Y, then Z slowest. For 2d grids (in 2d simulations), the grid IDs range from 1 to Nx*Ny, with X varying fastest and Y slowest. -.. versionadded:: TBD +.. versionadded:: 17Nov2023 The *proc* attribute is the ID of the processor which owns the grid cell. Processor IDs range from 0 to Nprocs - 1, where Nprocs is the diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 6820d2ee04..ba60b52563 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -201,7 +201,7 @@ information in this context, the *replace* keywords will extract the atom IDs for the two atoms in the bond of maximum stretch. These atom IDs and the bond stretch will be printed with thermodynamic output. -.. versionadded:: TBD +.. versionadded:: 17Nov2023 The *inputs* keyword allows selection of whether all the inputs are per-atom or local quantities. As noted above, all the inputs must be diff --git a/doc/src/compute_voronoi_atom.rst b/doc/src/compute_voronoi_atom.rst index 9607401ccd..5f00a2abab 100644 --- a/doc/src/compute_voronoi_atom.rst +++ b/doc/src/compute_voronoi_atom.rst @@ -190,7 +190,7 @@ Voro++ software in the src/VORONOI/README file. Output info """"""""""" -.. deprecated:: TBD +.. deprecated:: 17Nov2023 The *peratom* keyword was removed as it is no longer required. diff --git a/doc/src/dump.rst b/doc/src/dump.rst index e5885dc25d..bdaefb769e 100644 --- a/doc/src/dump.rst +++ b/doc/src/dump.rst @@ -613,7 +613,7 @@ when running on large numbers of processors. Note that using the "\*" and "%" characters together can produce a large number of small dump files! -.. deprecated:: TBD +.. deprecated:: 17Nov2023 The MPIIO package and the the corresponding "/mpiio" dump styles, except for the unrelated "netcdf/mpiio" style were removed from LAMMPS. diff --git a/doc/src/dump_image.rst b/doc/src/dump_image.rst index 4e227d2f72..43f182b889 100644 --- a/doc/src/dump_image.rst +++ b/doc/src/dump_image.rst @@ -599,7 +599,7 @@ image will appear. The *sfactor* value must be a value 0.0 <= *sfactor* <= 1.0, where *sfactor* = 1 is a highly reflective surface and *sfactor* = 0 is a rough non-shiny surface. -.. versionadded:: TBD +.. versionadded:: 17Nov2023 The *fsaa* keyword can be used with the dump image command to improve the image quality by enabling full scene anti-aliasing. Internally the diff --git a/doc/src/fix_deposit.rst b/doc/src/fix_deposit.rst index 4c256f524f..ff5afc2241 100644 --- a/doc/src/fix_deposit.rst +++ b/doc/src/fix_deposit.rst @@ -220,7 +220,7 @@ rotated configuration of the molecule. existing particle. LAMMPS will issue a warning if R is smaller than this value, based on the radii of existing and inserted particles. -.. versionadded:: TBD +.. versionadded:: 17Nov2023 The *var* and *set* keywords can be used together to provide a criterion for accepting or rejecting the addition of an individual atom, based on its diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 91c5e58add..7468ffea13 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -149,7 +149,7 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics only the k > 0 modes are thermostatted, not the centroid degrees of freedom. -.. versionadded:: TBD +.. versionadded:: 17Nov2023 Mode *pimd* added to fix pimd/langevin. diff --git a/src/library.cpp b/src/library.cpp index 9d542f86ae..7fc79333ad 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -617,7 +617,7 @@ combined by removing the '&' and the following newline character. After this processing the string is handed to LAMMPS for parsing and executing. -.. versionadded:: TBD +.. versionadded:: 17Nov2023 The command is now able to process long strings with triple quotes and loops using :doc:`jump SELF \ `. @@ -2484,7 +2484,7 @@ int lammps_set_variable(void *handle, char *name, char *str) * \verbatim embed:rst -.. versionadded:: TBD +.. versionadded:: 17Nov2023 This function copies a string with human readable information about a defined variable: name, style, current value(s) into the provided @@ -5581,7 +5581,7 @@ int lammps_config_has_ffmpeg_support() { * \verbatim embed:rst -.. deprecated:: TBD +.. deprecated:: 17Nov2023 LAMMPS has now exceptions always enabled, so this function will now always return 1 and can be removed from applications @@ -6658,7 +6658,7 @@ the failing MPI ranks to send messages. instance, but instead would check the global error buffer of the library interface. - .. versionchanged: TBD + .. versionchanged: 17Nov2023 The *buffer* pointer may be ``NULL``. This will clear any error status without copying the error message. diff --git a/src/version.h b/src/version.h index 35780aa785..a5844f0973 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1 @@ -#define LAMMPS_VERSION "3 Aug 2023" -#define LAMMPS_UPDATE "Development" +#define LAMMPS_VERSION "17 Nov 2023" From 0083cc9e26d2ad3dcb6f19d069502ce4c630d5b7 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Nov 2023 09:12:43 -0700 Subject: [PATCH 039/116] Port changes to OPENMP and INTEL packages --- src/INTEL/npair_skip_trim_intel.cpp | 271 ++++++++++++++++++++++++++++ src/INTEL/npair_skip_trim_intel.h | 62 +++++++ src/INTEL/npair_trim_intel.cpp | 5 +- src/OPENMP/npair_skip_omp.h | 12 +- src/neigh_request.h | 1 + 5 files changed, 341 insertions(+), 10 deletions(-) create mode 100644 src/INTEL/npair_skip_trim_intel.cpp create mode 100644 src/INTEL/npair_skip_trim_intel.h diff --git a/src/INTEL/npair_skip_trim_intel.cpp b/src/INTEL/npair_skip_trim_intel.cpp new file mode 100644 index 0000000000..e16e1bc413 --- /dev/null +++ b/src/INTEL/npair_skip_trim_intel.cpp @@ -0,0 +1,271 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "npair_skip_trim_intel.h" + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "modify.h" +#include "my_page.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +NPairSkipTrimIntel::NPairSkipTrimIntel(LAMMPS *lmp) : NPair(lmp) { + _fix = static_cast(modify->get_fix_by_id("package_intel")); + if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles"); + _inum_starts = new int[comm->nthreads]; + _inum_counts = new int[comm->nthreads]; + _full_props = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +NPairSkipTrimIntel::~NPairSkipTrimIntel() { + delete []_inum_starts; + delete []_inum_counts; + delete[] _full_props; +} + +/* ---------------------------------------------------------------------- */ + +void NPairSkipTrimIntel::copy_neighbor_info() +{ + NPair::copy_neighbor_info(); + // Only need to set _full_props once; npair object deleted for changes + if (_full_props) return; + _full_props = new int[neighbor->nrequest]; + for (int i = 0; i < neighbor->nrequest; i++) + _full_props[i] = neighbor->requests[i]->full; +} + +/* ---------------------------------------------------------------------- + build skip list for subset of types from parent list + works for half and full lists + works for owned (non-ghost) list, also for ghost list + iskip and ijskip flag which atom types and type pairs to skip + if ghost, also store neighbors of ghost atoms & set inum,gnum correctly +------------------------------------------------------------------------- */ + +template +void NPairSkipTrimIntel::build_t(NeighList *list, int *numhalf, int *cnumneigh, + int *numhalf_skip, IntelBuffers *buffers) +{ + const int nlocal = atom->nlocal; + const int e_nall = nlocal + atom->nghost; + const ATOM_T * _noalias const x = buffers->get_x(); + const int * _noalias const type = atom->type; + int * _noalias const ilist = list->ilist; + int * _noalias const numneigh = list->numneigh; + int ** _noalias const firstneigh = (int ** const)list->firstneigh; // NOLINT + const int * _noalias const ilist_skip = list->listskip->ilist; + const int * _noalias const numneigh_skip = list->listskip->numneigh; + const int ** _noalias const firstneigh_skip = (const int ** const)list->listskip->firstneigh; // NOLINT + const int * _noalias const iskip = list->iskip; + const int ** _noalias const ijskip = (const int ** const)list->ijskip; // NOLINT + + const flt_t cutsq_custom = cutoff_custom * cutoff_custom; + int num_skip = list->listskip->inum; + if (list->ghost) num_skip += list->listskip->gnum; + + int packthreads; + if (comm->nthreads > INTEL_HTHREADS && THREE==0) + packthreads = comm->nthreads; + else + packthreads = 1; + + #if defined(_OPENMP) + #pragma omp parallel if (packthreads > 1) + #endif + { + int tid, ifrom, ito; + IP_PRE_omp_range_id(ifrom, ito, tid, num_skip, packthreads); + + // each thread has its own page allocator + MyPage &ipage = list->ipage[tid]; + ipage.reset(); + + int my_inum = ifrom; + _inum_starts[tid] = ifrom; + + // loop over parent full list + for (int ii = ifrom; ii < ito; ii++) { + const int i = ilist_skip[ii]; + const int itype = type[i]; + if (iskip[itype]) continue; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + + int n = 0; + int *neighptr = ipage.vget(); + + // loop over parent non-skip list + + const int * _noalias const jlist = firstneigh_skip[i]; + const int jnum = numneigh_skip[i]; + + if (THREE) { + const int jnumhalf = numhalf_skip[ii]; + for (int jj = 0; jj < jnumhalf; jj++) { + const int joriginal = jlist[jj]; + const int j = joriginal & NEIGHMASK; + + int addme = 1; + if (ijskip[itype][type[j]]) addme = 0; + + // trim to shorter cutoff + + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) addme = 0; + + if (addme) + neighptr[n++] = joriginal; + } + numhalf[my_inum] = n; + + for (int jj = jnumhalf; jj < jnum; jj++) { + const int joriginal = jlist[jj]; + const int j = joriginal & NEIGHMASK; + + int addme = 1; + if (ijskip[itype][type[j]]) addme = 0; + + // trim to shorter cutoff + + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) addme = 0; + + if (addme) + neighptr[n++] = joriginal; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int jj = 0; jj < jnum; jj++) { + const int joriginal = jlist[jj]; + const int j = joriginal & NEIGHMASK; + + int addme = 1; + if (ijskip[itype][type[j]]) addme = 0; + + // trim to shorter cutoff + + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutsq_custom) addme = 0; + + if (addme) + neighptr[n++] = joriginal; + } + } + + ilist[my_inum++] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + + int pad_end = n; + IP_PRE_neighbor_pad(pad_end, 0); + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \ + avg=INTEL_COMPILE_WIDTH/2 + #endif + for ( ; n < pad_end; n++) + neighptr[n] = e_nall; + + ipage.vgot(n); + if (ipage.status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + + int last_inum = 0, loop_end; + _inum_counts[tid] = my_inum; + } + int inum = _inum_counts[0]; + for (int tid = 1; tid < packthreads; tid++) { + for (int i = _inum_starts[tid]; i < _inum_counts[tid]; i++) { + if (THREE) numhalf[inum] = numhalf[i]; + ilist[inum++] = ilist[i]; + } + } + list->inum = inum; + + if (THREE && num_skip > 0) { + int * const list_start = firstneigh[ilist[0]]; + for (int ii = 0; ii < inum; ii++) { + int i = ilist[ii]; + cnumneigh[ii] = static_cast(firstneigh[i] - list_start); + } + } + if (list->ghost) { + int num = 0; + int my_inum = list->inum; + for (int i = 0; i < my_inum; i++) + if (ilist[i] < nlocal) num++; + else break; + list->inum = num; + list->gnum = my_inum - num; + } +} + +/* ---------------------------------------------------------------------- */ + +void NPairSkipTrimIntel::build(NeighList *list) +{ + if (_fix->three_body_neighbor()==0 || + _full_props[list->listskip->index] == 0) { + if (_fix->precision() == FixIntel::PREC_MODE_MIXED) + build_t(list, nullptr, nullptr, nullptr, _fix->get_mixed_buffers()); + else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) + build_t(list, nullptr, nullptr, nullptr, _fix->get_double_buffers()); + else + build_t(list, nullptr, nullptr, nullptr, _fix->get_single_buffers()); + } else { + int *nhalf, *cnumneigh, *nhalf_skip, *u; + if (_fix->precision() == FixIntel::PREC_MODE_MIXED) { + _fix->get_mixed_buffers()->get_list_data3(list->listskip,nhalf_skip,u); + _fix->get_mixed_buffers()->grow_data3(list, nhalf, cnumneigh); + build_t(list, nhalf, cnumneigh, nhalf_skip, _fix->get_mixed_buffers()); + } else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + _fix->get_double_buffers()->get_list_data3(list->listskip,nhalf_skip,u); + _fix->get_double_buffers()->grow_data3(list, nhalf, cnumneigh); + build_t(list, nhalf, cnumneigh, nhalf_skip, _fix->get_double_buffers()); + } else { + _fix->get_single_buffers()->get_list_data3(list->listskip,nhalf_skip,u); + _fix->get_single_buffers()->grow_data3(list,nhalf,cnumneigh); + build_t(list, nhalf, cnumneigh, nhalf_skip, _fix->get_single_buffers()); + } + } +} diff --git a/src/INTEL/npair_skip_trim_intel.h b/src/INTEL/npair_skip_trim_intel.h new file mode 100644 index 0000000000..f0018e5df4 --- /dev/null +++ b/src/INTEL/npair_skip_trim_intel.h @@ -0,0 +1,62 @@ +// clang-format off +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS +// clang-format off +NPairStyle(skip/trim/intel, + NPairSkipTrimIntel, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL); + +NPairStyle(skip/trim/ghost/intel, + NPairSkipTrimIntel, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_INTEL); +// clang-format on +#else + +#ifndef LMP_NPAIR_SKIP_TRIM_INTEL_H +#define LMP_NPAIR_SKIP_TRIM_INTEL_H + +#include "fix_intel.h" +#include "npair.h" + +#if defined(_OPENMP) +#include +#endif + +namespace LAMMPS_NS { + +class NPairSkipTrimIntel : public NPair { + public: + NPairSkipTrimIntel(class LAMMPS *); + ~NPairSkipTrimIntel() override; + void copy_neighbor_info() override; + void build(class NeighList *) override; + + protected: + FixIntel *_fix; + int *_inum_starts, *_inum_counts, *_full_props; + + template + void build_t(NeighList *, int *numhalf, int *cnumneigh, int *numhalf_skip, + IntelBuffers *); +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/INTEL/npair_trim_intel.cpp b/src/INTEL/npair_trim_intel.cpp index d377419f1b..dcf10a3c87 100644 --- a/src/INTEL/npair_trim_intel.cpp +++ b/src/INTEL/npair_trim_intel.cpp @@ -88,7 +88,6 @@ void NPairTrimIntel::build_t(NeighList *list, for (int jj = 0; jj < jnum; jj++) { const int joriginal = jlist[jj]; const int j = joriginal & NEIGHMASK; - int addme = 1; // trim to shorter cutoff @@ -97,9 +96,7 @@ void NPairTrimIntel::build_t(NeighList *list, const flt_t delz = ztmp - x[j].z; const flt_t rsq = delx * delx + dely * dely + delz * delz; - if (rsq > cutsq_custom) addme = 0; - - if (addme) + if (rsq <= cutsq_custom) neighptr[n++] = joriginal; } diff --git a/src/OPENMP/npair_skip_omp.h b/src/OPENMP/npair_skip_omp.h index ce61968c17..a733308ccb 100644 --- a/src/OPENMP/npair_skip_omp.h +++ b/src/OPENMP/npair_skip_omp.h @@ -20,36 +20,36 @@ NPairStyle(skip/omp, NPairSkip, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); NPairStyle(skip/half/respa/omp, NPairSkipRespa, NP_SKIP | NP_RESPA | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); NPairStyle(skip/half/size/omp, NPairSkipSize, NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); NPairStyle(skip/size/off2on/omp, NPairSkipSizeOff2on, NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); NPairStyle(skip/size/off2on/oneside/omp, NPairSkipSizeOff2onOneside, NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | - NP_ORTHO | NP_TRI | NP_OMP); + NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); NPairStyle(skip/ghost/omp, NPairSkip, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP | NP_GHOST); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP | NP_GHOST); // clang-format off #endif diff --git a/src/neigh_request.h b/src/neigh_request.h index a3114dff82..fa57922c93 100644 --- a/src/neigh_request.h +++ b/src/neigh_request.h @@ -26,6 +26,7 @@ class NeighRequest : protected Pointers { friend class NStencil; friend class NeighborKokkos; friend class NPairSkipIntel; + friend class NPairSkipTrimIntel; friend class FixIntel; protected: From 06b962fc339312ef471f6e6fe12b038cb2c9ff9e Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Nov 2023 09:50:03 -0700 Subject: [PATCH 040/116] Bugfix: port missed changes from #3846 --- src/OPENMP/npair_halffull_newton_trim_omp.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/OPENMP/npair_halffull_newton_trim_omp.cpp b/src/OPENMP/npair_halffull_newton_trim_omp.cpp index 1446175013..9fdd4957af 100644 --- a/src/OPENMP/npair_halffull_newton_trim_omp.cpp +++ b/src/OPENMP/npair_halffull_newton_trim_omp.cpp @@ -38,6 +38,8 @@ NPairHalffullNewtonTrimOmp::NPairHalffullNewtonTrimOmp(LAMMPS *lmp) : NPair(lmp) void NPairHalffullNewtonTrimOmp::build(NeighList *list) { const int inum_full = list->listfull->inum; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -86,8 +88,17 @@ void NPairHalffullNewtonTrimOmp::build(NeighList *list) for (jj = 0; jj < jnum; jj++) { joriginal = jlist[jj]; j = joriginal & NEIGHMASK; + if (j < nlocal) { if (i > j) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { From 6fc7f5689b06362181677ea7fb7394fa3f726742 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Nov 2023 10:04:11 -0700 Subject: [PATCH 041/116] Port changes to OPENMP package --- src/OPENMP/npair_skip_omp.h | 12 +++---- src/OPENMP/npair_skip_trim_omp.h | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 6 deletions(-) create mode 100644 src/OPENMP/npair_skip_trim_omp.h diff --git a/src/OPENMP/npair_skip_omp.h b/src/OPENMP/npair_skip_omp.h index a733308ccb..ce61968c17 100644 --- a/src/OPENMP/npair_skip_omp.h +++ b/src/OPENMP/npair_skip_omp.h @@ -20,36 +20,36 @@ NPairStyle(skip/omp, NPairSkip, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); NPairStyle(skip/half/respa/omp, NPairSkipRespa, NP_SKIP | NP_RESPA | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); NPairStyle(skip/half/size/omp, NPairSkipSize, NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); NPairStyle(skip/size/off2on/omp, NPairSkipSizeOff2on, NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP); NPairStyle(skip/size/off2on/oneside/omp, NPairSkipSizeOff2onOneside, NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | - NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + NP_ORTHO | NP_TRI | NP_OMP); NPairStyle(skip/ghost/omp, NPairSkip, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP | NP_GHOST); + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP | NP_GHOST); // clang-format off #endif diff --git a/src/OPENMP/npair_skip_trim_omp.h b/src/OPENMP/npair_skip_trim_omp.h new file mode 100644 index 0000000000..aba6f50e17 --- /dev/null +++ b/src/OPENMP/npair_skip_trim_omp.h @@ -0,0 +1,55 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +// There is no benefit from multi-threading for skip lists, so we +// just forward the requests to the corresponding non-omp versions. + +#ifdef NPAIR_CLASS +// clang-format off +NPairStyle(skip/trim/omp, + NPairSkipTrim, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + +NPairStyle(skip/trim/half/respa/omp, + NPairSkipTrimRespa, + NP_SKIP | NP_RESPA | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + +NPairStyle(skip/trim/half/size/omp, + NPairSkipTrimSize, + NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + +NPairStyle(skip/trim/size/off2on/omp, + NPairSkipTrimSizeOff2on, + NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + +NPairStyle(skip/trim/size/off2on/oneside/omp, + NPairSkipTrimSizeOff2onOneside, + NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | + NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); + +NPairStyle(skip/trim/ghost/omp, + NPairSkipTrim, + NP_SKIP | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP | NP_GHOST); +// clang-format off +#endif + From be19b5c210c08988da430db330bb72a2b7c73703 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Nov 2023 12:46:45 -0700 Subject: [PATCH 042/116] Make naming consistent --- ....h => npair_halffull_trim_newtoff_intel.h} | 22 +++++++------- ...p => npair_halffull_trim_newton_intel.cpp} | 10 +++---- ...l.h => npair_halffull_trim_newton_intel.h} | 16 +++++----- src/KOKKOS/npair_skip_kokkos.h | 8 ++--- ...pp => npair_halffull_trim_newtoff_omp.cpp} | 6 ++-- ...mp.h => npair_halffull_trim_newtoff_omp.h} | 18 +++++------ ...cpp => npair_halffull_trim_newton_omp.cpp} | 8 +++-- ...omp.h => npair_halffull_trim_newton_omp.h} | 16 +++++----- ...im.cpp => npair_halffull_trim_newtoff.cpp} | 6 ++-- ...f_trim.h => npair_halffull_trim_newtoff.h} | 30 +++++++++---------- ...rim.cpp => npair_halffull_trim_newton.cpp} | 6 ++-- ...on_trim.h => npair_halffull_trim_newton.h} | 18 +++++------ ...spa_trim.cpp => npair_skip_trim_respa.cpp} | 6 ++-- ...p_respa_trim.h => npair_skip_trim_respa.h} | 12 ++++---- ...size_trim.cpp => npair_skip_trim_size.cpp} | 6 ++-- ...kip_size_trim.h => npair_skip_trim_size.h} | 12 ++++---- ...im.cpp => npair_skip_trim_size_off2on.cpp} | 6 ++-- ...n_trim.h => npair_skip_trim_size_off2on.h} | 12 ++++---- ...> npair_skip_trim_size_off2on_oneside.cpp} | 6 ++-- ... => npair_skip_trim_size_off2on_oneside.h} | 12 ++++---- 20 files changed, 119 insertions(+), 117 deletions(-) rename src/INTEL/{npair_halffull_newtoff_trim_intel.h => npair_halffull_trim_newtoff_intel.h} (72%) rename src/INTEL/{npair_halffull_newton_trim_intel.cpp => npair_halffull_trim_newton_intel.cpp} (97%) rename src/INTEL/{npair_halffull_newton_trim_intel.h => npair_halffull_trim_newton_intel.h} (81%) rename src/OPENMP/{npair_halffull_newtoff_trim_omp.cpp => npair_halffull_trim_newtoff_omp.cpp} (94%) rename src/OPENMP/{npair_halffull_newtoff_trim_omp.h => npair_halffull_trim_newtoff_omp.h} (72%) rename src/OPENMP/{npair_halffull_newton_trim_omp.cpp => npair_halffull_trim_newton_omp.cpp} (94%) rename src/OPENMP/{npair_halffull_newton_trim_omp.h => npair_halffull_trim_newton_omp.h} (76%) rename src/{npair_halffull_newtoff_trim.cpp => npair_halffull_trim_newtoff.cpp} (94%) rename src/{npair_halffull_newtoff_trim.h => npair_halffull_trim_newtoff.h} (66%) rename src/{npair_halffull_newton_trim.cpp => npair_halffull_trim_newton.cpp} (95%) rename src/{npair_halffull_newton_trim.h => npair_halffull_trim_newton.h} (74%) rename src/{npair_skip_respa_trim.cpp => npair_skip_trim_respa.cpp} (97%) rename src/{npair_skip_respa_trim.h => npair_skip_trim_respa.h} (82%) rename src/{npair_skip_size_trim.cpp => npair_skip_trim_size.cpp} (95%) rename src/{npair_skip_size_trim.h => npair_skip_trim_size.h} (82%) rename src/{npair_skip_size_off2on_trim.cpp => npair_skip_trim_size_off2on.cpp} (95%) rename src/{npair_skip_size_off2on_trim.h => npair_skip_trim_size_off2on.h} (80%) rename src/{npair_skip_size_off2on_oneside_trim.cpp => npair_skip_trim_size_off2on_oneside.cpp} (96%) rename src/{npair_skip_size_off2on_oneside_trim.h => npair_skip_trim_size_off2on_oneside.h} (78%) diff --git a/src/INTEL/npair_halffull_newtoff_trim_intel.h b/src/INTEL/npair_halffull_trim_newtoff_intel.h similarity index 72% rename from src/INTEL/npair_halffull_newtoff_trim_intel.h rename to src/INTEL/npair_halffull_trim_newtoff_intel.h index d8594ce3b8..5e8b01cd09 100644 --- a/src/INTEL/npair_halffull_newtoff_trim_intel.h +++ b/src/INTEL/npair_halffull_trim_newtoff_intel.h @@ -21,24 +21,24 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(halffull/newtoff/trim/intel, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff/intel, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF | NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL); -NPairStyle(halffull/newtoff/skip/trim/intel, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff/skip/intel, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF | - NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL); + NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_INTEL); -NPairStyle(halffull/newtoff/ghost/trim/intel, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff/ghost/intel, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF | - NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL); + NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_INTEL); -NPairStyle(halffull/newtoff/skip/ghost/trim/intel, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff/skip/ghost/intel, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF | - NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL); + NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_GHOST | NP_INTEL); // clang-format on #endif diff --git a/src/INTEL/npair_halffull_newton_trim_intel.cpp b/src/INTEL/npair_halffull_trim_newton_intel.cpp similarity index 97% rename from src/INTEL/npair_halffull_newton_trim_intel.cpp rename to src/INTEL/npair_halffull_trim_newton_intel.cpp index 34b9b20e9c..b1b69734a4 100644 --- a/src/INTEL/npair_halffull_newton_trim_intel.cpp +++ b/src/INTEL/npair_halffull_trim_newton_intel.cpp @@ -16,7 +16,7 @@ Contributing author: Stan Moore (SNL) ------------------------------------------------------------------------- */ -#include "npair_halffull_newton_trim_intel.h" +#include "npair_halffull_trim_newton_intel.h" #include "atom.h" #include "comm.h" @@ -31,7 +31,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalffullNewtonTrimIntel::NPairHalffullNewtonTrimIntel(LAMMPS *lmp) : NPair(lmp) { +NPairHalffullTrimNewtonIntel::NPairHalffullTrimNewtonIntel(LAMMPS *lmp) : NPair(lmp) { _fix = static_cast(modify->get_fix_by_id("package_intel")); if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles"); } @@ -44,7 +44,7 @@ NPairHalffullNewtonTrimIntel::NPairHalffullNewtonTrimIntel(LAMMPS *lmp) : NPair( ------------------------------------------------------------------------- */ template -void NPairHalffullNewtonTrimIntel::build_t(NeighList *list, +void NPairHalffullTrimNewtonIntel::build_t(NeighList *list, IntelBuffers *buffers) { const int inum_full = list->listfull->inum; @@ -182,7 +182,7 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list, ------------------------------------------------------------------------- */ template -void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf, +void NPairHalffullTrimNewtonIntel::build_t3(NeighList *list, int *numhalf, IntelBuffers *buffers) { const int inum_full = list->listfull->inum; @@ -272,7 +272,7 @@ void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf, /* ---------------------------------------------------------------------- */ -void NPairHalffullNewtonTrimIntel::build(NeighList *list) +void NPairHalffullTrimNewtonIntel::build(NeighList *list) { if (_fix->three_body_neighbor() == 0 || domain->triclinic) { if (_fix->precision() == FixIntel::PREC_MODE_MIXED) diff --git a/src/INTEL/npair_halffull_newton_trim_intel.h b/src/INTEL/npair_halffull_trim_newton_intel.h similarity index 81% rename from src/INTEL/npair_halffull_newton_trim_intel.h rename to src/INTEL/npair_halffull_trim_newton_intel.h index 0ca551d682..dfce63e93d 100644 --- a/src/INTEL/npair_halffull_newton_trim_intel.h +++ b/src/INTEL/npair_halffull_trim_newton_intel.h @@ -18,20 +18,20 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(halffull/newton/trim/intel, - NPairHalffullNewtonTrimIntel, +NPairStyle(halffull/trim/newton/intel, + NPairHalffullTrimNewtonIntel, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL); -NPairStyle(halffull/newton/skip/trim/intel, - NPairHalffullNewtonTrimIntel, +NPairStyle(halffull/trim/newton/skip/intel, + NPairHalffullTrimNewtonIntel, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL); // clang-format on #else -#ifndef LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H -#define LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H +#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTON_INTEL_H +#define LMP_NPAIR_HALFFULL_TRIM_NEWTON_INTEL_H #include "fix_intel.h" #include "npair.h" @@ -42,9 +42,9 @@ NPairStyle(halffull/newton/skip/trim/intel, namespace LAMMPS_NS { -class NPairHalffullNewtonTrimIntel : public NPair { +class NPairHalffullTrimNewtonIntel : public NPair { public: - NPairHalffullNewtonTrimIntel(class LAMMPS *); + NPairHalffullTrimNewtonIntel(class LAMMPS *); void build(class NeighList *) override; protected: diff --git a/src/KOKKOS/npair_skip_kokkos.h b/src/KOKKOS/npair_skip_kokkos.h index 7672a2c36c..293c53677b 100644 --- a/src/KOKKOS/npair_skip_kokkos.h +++ b/src/KOKKOS/npair_skip_kokkos.h @@ -42,28 +42,28 @@ NPairStyle(skip/ghost/kk/host, NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST); typedef NPairSkipKokkos NPairKokkosSkipTrimDevice; -NPairStyle(skip/kk/device, +NPairStyle(skip/trim/kk/device, NPairKokkosSkipTrimDevice, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM |NP_KOKKOS_DEVICE); typedef NPairSkipKokkos NPairKokkosSkipTrimGhostDevice; -NPairStyle(skip/ghost/kk/device, +NPairStyle(skip/trim/ghost/kk/device, NPairKokkosSkipTrimGhostDevice, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_KOKKOS_DEVICE); typedef NPairSkipKokkos NPairKokkosSkipTrimHost; -NPairStyle(skip/kk/host, +NPairStyle(skip/trim/kk/host, NPairKokkosSkipTrimHost, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST); typedef NPairSkipKokkos NPairKokkosSkipTrimGhostHost; -NPairStyle(skip/ghost/kk/host, +NPairStyle(skip/trim/ghost/kk/host, NPairKokkosSkipTrimGhostHost, NP_SKIP | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | diff --git a/src/OPENMP/npair_halffull_newtoff_trim_omp.cpp b/src/OPENMP/npair_halffull_trim_newtoff_omp.cpp similarity index 94% rename from src/OPENMP/npair_halffull_newtoff_trim_omp.cpp rename to src/OPENMP/npair_halffull_trim_newtoff_omp.cpp index d35b3b2ee8..d0c5c1ab86 100644 --- a/src/OPENMP/npair_halffull_newtoff_trim_omp.cpp +++ b/src/OPENMP/npair_halffull_trim_newtoff_omp.cpp @@ -12,7 +12,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_halffull_newtoff_trim_omp.h" +#include "npair_halffull_trim_newtoff_omp.h" #include "atom.h" #include "error.h" @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalffullNewtoffTrimOmp::NPairHalffullNewtoffTrimOmp(LAMMPS *lmp) : NPair(lmp) {} +NPairHalffullTrimNewtoffOmp::NPairHalffullTrimNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build half list from full list and trim to shorter cutoff @@ -35,7 +35,7 @@ NPairHalffullNewtoffTrimOmp::NPairHalffullNewtoffTrimOmp(LAMMPS *lmp) : NPair(lm works if full list is a skip list ------------------------------------------------------------------------- */ -void NPairHalffullNewtoffTrimOmp::build(NeighList *list) +void NPairHalffullTrimNewtoffOmp::build(NeighList *list) { const int inum_full = list->listfull->inum; diff --git a/src/OPENMP/npair_halffull_newtoff_trim_omp.h b/src/OPENMP/npair_halffull_trim_newtoff_omp.h similarity index 72% rename from src/OPENMP/npair_halffull_newtoff_trim_omp.h rename to src/OPENMP/npair_halffull_trim_newtoff_omp.h index 19e1c55eeb..c86c132b69 100644 --- a/src/OPENMP/npair_halffull_newtoff_trim_omp.h +++ b/src/OPENMP/npair_halffull_trim_newtoff_omp.h @@ -13,28 +13,28 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(halffull/newtoff/trim/omp, - NPairHalffullNewtoffTrimOmp, +NPairStyle(halffull/trim/newtoff/omp, + NPairHalffullTrimNewtoffOmp, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP); -NPairStyle(halffull/newtoff/skip/trim/omp, - NPairHalffullNewtoffTrimOmp, +NPairStyle(halffull/trim/newtoff/skip/omp, + NPairHalffullTrimNewtoffOmp, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF | - NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_OMP); + NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_OMP); // clang-format on #else -#ifndef LMP_NPAIR_HALFFULL_NEWTOFF_TRIM_OMP_H -#define LMP_NPAIR_HALFFULL_NEWTOFF_TRIM_OMP_H +#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_OMP_H +#define LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_OMP_H #include "npair.h" namespace LAMMPS_NS { -class NPairHalffullNewtoffTrimOmp : public NPair { +class NPairHalffullTrimNewtoffOmp : public NPair { public: - NPairHalffullNewtoffTrimOmp(class LAMMPS *); + NPairHalffullTrimNewtoffOmp(class LAMMPS *); void build(class NeighList *) override; }; diff --git a/src/OPENMP/npair_halffull_newton_trim_omp.cpp b/src/OPENMP/npair_halffull_trim_newton_omp.cpp similarity index 94% rename from src/OPENMP/npair_halffull_newton_trim_omp.cpp rename to src/OPENMP/npair_halffull_trim_newton_omp.cpp index 9fdd4957af..bd9d553eb9 100644 --- a/src/OPENMP/npair_halffull_newton_trim_omp.cpp +++ b/src/OPENMP/npair_halffull_trim_newton_omp.cpp @@ -12,10 +12,12 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_halffull_newton_trim_omp.h" +#include "npair_halffull_trim_newton_omp.h" #include "atom.h" +#include "domain.h" #include "error.h" +#include "force.h" #include "my_page.h" #include "neigh_list.h" #include "npair_omp.h" @@ -26,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalffullNewtonTrimOmp::NPairHalffullNewtonTrimOmp(LAMMPS *lmp) : NPair(lmp) {} +NPairHalffullTrimNewtonOmp::NPairHalffullTrimNewtonOmp(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build half list from full list and trim to shorter cutoff @@ -35,7 +37,7 @@ NPairHalffullNewtonTrimOmp::NPairHalffullNewtonTrimOmp(LAMMPS *lmp) : NPair(lmp) works if full list is a skip list ------------------------------------------------------------------------- */ -void NPairHalffullNewtonTrimOmp::build(NeighList *list) +void NPairHalffullTrimNewtonOmp::build(NeighList *list) { const int inum_full = list->listfull->inum; const double delta = 0.01 * force->angstrom; diff --git a/src/OPENMP/npair_halffull_newton_trim_omp.h b/src/OPENMP/npair_halffull_trim_newton_omp.h similarity index 76% rename from src/OPENMP/npair_halffull_newton_trim_omp.h rename to src/OPENMP/npair_halffull_trim_newton_omp.h index 4cb84f1b3a..c6950dfa45 100644 --- a/src/OPENMP/npair_halffull_newton_trim_omp.h +++ b/src/OPENMP/npair_halffull_trim_newton_omp.h @@ -13,28 +13,28 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(halffull/newton/trim/omp, - NPairHalffullNewtonTrimOmp, +NPairStyle(halffull/trim/newton/omp, + NPairHalffullTrimNewtonOmp, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_ORTHO | NP_TRI| NP_TRIM | NP_OMP); -NPairStyle(halffull/newton/skip/trim/omp, - NPairHalffullNewtonTrimOmp, +NPairStyle(halffull/trim/newton/skip/omp, + NPairHalffullTrimNewtonOmp, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_OMP); // clang-format on #else -#ifndef LMP_NPAIR_HALFFULL_NEWTON_TRIM_OMP_H -#define LMP_NPAIR_HALFFULL_NEWTON_TRIM_OMP_H +#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTON_OMP_H +#define LMP_NPAIR_HALFFULL_TRIM_NEWTON_OMP_H #include "npair.h" namespace LAMMPS_NS { -class NPairHalffullNewtonTrimOmp : public NPair { +class NPairHalffullTrimNewtonOmp : public NPair { public: - NPairHalffullNewtonTrimOmp(class LAMMPS *); + NPairHalffullTrimNewtonOmp(class LAMMPS *); void build(class NeighList *) override; }; diff --git a/src/npair_halffull_newtoff_trim.cpp b/src/npair_halffull_trim_newtoff.cpp similarity index 94% rename from src/npair_halffull_newtoff_trim.cpp rename to src/npair_halffull_trim_newtoff.cpp index 8ed392da2f..db97bf185a 100644 --- a/src/npair_halffull_newtoff_trim.cpp +++ b/src/npair_halffull_trim_newtoff.cpp @@ -11,7 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_halffull_newtoff_trim.h" +#include "npair_halffull_trim_newtoff.h" #include "atom.h" #include "error.h" @@ -22,7 +22,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalffullNewtoffTrim::NPairHalffullNewtoffTrim(LAMMPS *lmp) : NPair(lmp) {} +NPairHalffullTrimNewtoff::NPairHalffullTrimNewtoff(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build half list from full list @@ -33,7 +33,7 @@ NPairHalffullNewtoffTrim::NPairHalffullNewtoffTrim(LAMMPS *lmp) : NPair(lmp) {} if ghost, also store neighbors of ghost atoms & set inum,gnum correctly ------------------------------------------------------------------------- */ -void NPairHalffullNewtoffTrim::build(NeighList *list) +void NPairHalffullTrimNewtoff::build(NeighList *list) { int i, j, ii, jj, n, jnum, joriginal; int *neighptr, *jlist; diff --git a/src/npair_halffull_newtoff_trim.h b/src/npair_halffull_trim_newtoff.h similarity index 66% rename from src/npair_halffull_newtoff_trim.h rename to src/npair_halffull_trim_newtoff.h index 0fe730f9c1..ca7726c837 100644 --- a/src/npair_halffull_newtoff_trim.h +++ b/src/npair_halffull_trim_newtoff.h @@ -13,38 +13,38 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(halffull/newtoff/trim, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF | NP_ORTHO | NP_TRI | NP_TRIM); -NPairStyle(halffull/newtoff/skip/trim, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff/skip, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF | - NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM); + NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP); -NPairStyle(halffull/newtoff/ghost/trim, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff/ghost, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF | - NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM); + NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST); -NPairStyle(halffull/newtoff/skip/ghost/trim, - NPairHalffullNewtoffTrim, +NPairStyle(halffull/trim/newtoff/skip/ghost, + NPairHalffullTrimNewtoff, NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF | - NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM); + NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_GHOST); // clang-format on #else -#ifndef LMP_NPAIR_HALFFULL_NEWTOFF_TRIM_H -#define LMP_NPAIR_HALFFULL_NEWTOFF_TRIM_H +#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_H +#define LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_H #include "npair.h" namespace LAMMPS_NS { -class NPairHalffullNewtoffTrim : public NPair { +class NPairHalffullTrimNewtoff : public NPair { public: - NPairHalffullNewtoffTrim(class LAMMPS *); + NPairHalffullTrimNewtoff(class LAMMPS *); void build(class NeighList *) override; }; diff --git a/src/npair_halffull_newton_trim.cpp b/src/npair_halffull_trim_newton.cpp similarity index 95% rename from src/npair_halffull_newton_trim.cpp rename to src/npair_halffull_trim_newton.cpp index e758c04284..56cef00b25 100644 --- a/src/npair_halffull_newton_trim.cpp +++ b/src/npair_halffull_trim_newton.cpp @@ -11,7 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_halffull_newton_trim.h" +#include "npair_halffull_trim_newton.h" #include "atom.h" #include "domain.h" @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalffullNewtonTrim::NPairHalffullNewtonTrim(LAMMPS *lmp) : NPair(lmp) {} +NPairHalffullTrimNewton::NPairHalffullTrimNewton(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build half list from full list @@ -33,7 +33,7 @@ NPairHalffullNewtonTrim::NPairHalffullNewtonTrim(LAMMPS *lmp) : NPair(lmp) {} works if full list is a skip list ------------------------------------------------------------------------- */ -void NPairHalffullNewtonTrim::build(NeighList *list) +void NPairHalffullTrimNewton::build(NeighList *list) { int i, j, ii, jj, n, jnum, joriginal; int *neighptr, *jlist; diff --git a/src/npair_halffull_newton_trim.h b/src/npair_halffull_trim_newton.h similarity index 74% rename from src/npair_halffull_newton_trim.h rename to src/npair_halffull_trim_newton.h index aad3edcbfb..5eb5aa3cd3 100644 --- a/src/npair_halffull_newton_trim.h +++ b/src/npair_halffull_trim_newton.h @@ -13,28 +13,28 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(halffull/newton/trim, - NPairHalffullNewtonTrim, +NPairStyle(halffull/trim/newton, + NPairHalffullTrimNewton, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_ORTHO | NP_TRI | NP_TRIM); -NPairStyle(halffull/newton/skip/trim, - NPairHalffullNewtonTrim, +NPairStyle(halffull/trim/newton/skip, + NPairHalffullTrimNewton, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | - NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM); + NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP); // clang-format on #else -#ifndef LMP_NPAIR_HALFFULL_NEWTON_TRIM_H -#define LMP_NPAIR_HALFFULL_NEWTON_TRIM_H +#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTON_H +#define LMP_NPAIR_HALFFULL_TRIM_NEWTON_H #include "npair.h" namespace LAMMPS_NS { -class NPairHalffullNewtonTrim : public NPair { +class NPairHalffullTrimNewton : public NPair { public: - NPairHalffullNewtonTrim(class LAMMPS *); + NPairHalffullTrimNewton(class LAMMPS *); void build(class NeighList *) override; }; diff --git a/src/npair_skip_respa_trim.cpp b/src/npair_skip_trim_respa.cpp similarity index 97% rename from src/npair_skip_respa_trim.cpp rename to src/npair_skip_trim_respa.cpp index 64b1c4d716..7dd040ca0a 100644 --- a/src/npair_skip_respa_trim.cpp +++ b/src/npair_skip_trim_respa.cpp @@ -12,7 +12,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_skip_respa_trim.h" +#include "npair_skip_trim_respa.h" #include "atom.h" #include "error.h" @@ -23,7 +23,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairSkipRespaTrim::NPairSkipRespaTrim(LAMMPS *lmp) : NPair(lmp) {} +NPairSkipTrimRespa::NPairSkipTrimRespa(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build skip list for subset of types from parent list @@ -31,7 +31,7 @@ NPairSkipRespaTrim::NPairSkipRespaTrim(LAMMPS *lmp) : NPair(lmp) {} this is for respa lists, copy the inner/middle values from parent ------------------------------------------------------------------------- */ -void NPairSkipRespaTrim::build(NeighList *list) +void NPairSkipTrimRespa::build(NeighList *list) { int i,j,ii,jj,n,itype,jnum,joriginal,n_inner,n_middle; int *neighptr,*jlist,*neighptr_inner,*neighptr_middle; diff --git a/src/npair_skip_respa_trim.h b/src/npair_skip_trim_respa.h similarity index 82% rename from src/npair_skip_respa_trim.h rename to src/npair_skip_trim_respa.h index f10b726cbe..dcfe71c28d 100644 --- a/src/npair_skip_respa_trim.h +++ b/src/npair_skip_trim_respa.h @@ -13,24 +13,24 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(skip/half/respa/trim, - NPairSkipRespaTrim, +NPairStyle(skip/trim/half/respa, + NPairSkipTrimRespa, NP_SKIP | NP_RESPA | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); // clang-format on #else -#ifndef LMP_NPAIR_SKIP_RESPA_TRIM_H -#define LMP_NPAIR_SKIP_RESPA_TRIM_H +#ifndef LMP_NPAIR_SKIP_TRIM_RESPA_H +#define LMP_NPAIR_SKIP_TRIM_RESPA_H #include "npair.h" namespace LAMMPS_NS { -class NPairSkipRespaTrim : public NPair { +class NPairSkipTrimRespa : public NPair { public: - NPairSkipRespaTrim(class LAMMPS *); + NPairSkipTrimRespa(class LAMMPS *); void build(class NeighList *) override; }; diff --git a/src/npair_skip_size_trim.cpp b/src/npair_skip_trim_size.cpp similarity index 95% rename from src/npair_skip_size_trim.cpp rename to src/npair_skip_trim_size.cpp index 3fd8f912f9..fab70a78b5 100644 --- a/src/npair_skip_size_trim.cpp +++ b/src/npair_skip_trim_size.cpp @@ -11,7 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_skip_size_trim.h" +#include "npair_skip_trim_size.h" #include "atom.h" #include "error.h" @@ -22,14 +22,14 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairSkipSizeTrim::NPairSkipSizeTrim(LAMMPS *lmp) : NPair(lmp) {} +NPairSkipTrimSize::NPairSkipTrimSize(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build skip list for subset of types from parent list iskip and ijskip flag which atom types and type pairs to skip ------------------------------------------------------------------------- */ -void NPairSkipSizeTrim::build(NeighList *list) +void NPairSkipTrimSize::build(NeighList *list) { int i, j, ii, jj, n, itype, jnum, joriginal; int *neighptr, *jlist; diff --git a/src/npair_skip_size_trim.h b/src/npair_skip_trim_size.h similarity index 82% rename from src/npair_skip_size_trim.h rename to src/npair_skip_trim_size.h index e94b2f5f29..3b536860ca 100644 --- a/src/npair_skip_size_trim.h +++ b/src/npair_skip_trim_size.h @@ -13,23 +13,23 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(skip/half/size/trim, - NPairSkipSizeTrim, +NPairStyle(skip/trim/half/size, + NPairSkipTrimSize, NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); // clang-format on #else -#ifndef LMP_NPAIR_SKIP_SIZE_TRIM_H -#define LMP_NPAIR_SKIP_SIZE_TRIM_H +#ifndef LMP_NPAIR_SKIP_TRIM_SIZE_H +#define LMP_NPAIR_SKIP_TRIM_SIZE_H #include "npair.h" namespace LAMMPS_NS { -class NPairSkipSizeTrim : public NPair { +class NPairSkipTrimSize : public NPair { public: - NPairSkipSizeTrim(class LAMMPS *); + NPairSkipTrimSize(class LAMMPS *); void build(class NeighList *) override; }; diff --git a/src/npair_skip_size_off2on_trim.cpp b/src/npair_skip_trim_size_off2on.cpp similarity index 95% rename from src/npair_skip_size_off2on_trim.cpp rename to src/npair_skip_trim_size_off2on.cpp index 9591bbc4eb..3e9a1e5f63 100644 --- a/src/npair_skip_size_off2on_trim.cpp +++ b/src/npair_skip_trim_size_off2on.cpp @@ -11,7 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_skip_size_off2on_trim.h" +#include "npair_skip_trim_size_off2on.h" #include "atom.h" #include "error.h" @@ -22,7 +22,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairSkipSizeOff2onTrim::NPairSkipSizeOff2onTrim(LAMMPS *lmp) : NPair(lmp) {} +NPairSkipTrimSizeOff2on::NPairSkipTrimSizeOff2on(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build skip list for subset of types from parent list @@ -30,7 +30,7 @@ NPairSkipSizeOff2onTrim::NPairSkipSizeOff2onTrim(LAMMPS *lmp) : NPair(lmp) {} parent non-skip list used newton off, this skip list is newton on ------------------------------------------------------------------------- */ -void NPairSkipSizeOff2onTrim::build(NeighList *list) +void NPairSkipTrimSizeOff2on::build(NeighList *list) { int i, j, ii, jj, n, itype, jnum, joriginal; tagint itag, jtag; diff --git a/src/npair_skip_size_off2on_trim.h b/src/npair_skip_trim_size_off2on.h similarity index 80% rename from src/npair_skip_size_off2on_trim.h rename to src/npair_skip_trim_size_off2on.h index e471ddd2cc..6e52082329 100644 --- a/src/npair_skip_size_off2on_trim.h +++ b/src/npair_skip_trim_size_off2on.h @@ -13,24 +13,24 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(skip/size/off2on/trim, - NPairSkipSizeOff2onTrim, +NPairStyle(skip/trim/size/off2on, + NPairSkipTrimSizeOff2on, NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); // clang-format on #else -#ifndef LMP_NPAIR_SKIP_SIZE_OFF2ON_TRIM_H -#define LMP_NPAIR_SKIP_SIZE_OFF2ON_TRIM_H +#ifndef LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_H +#define LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_H #include "npair.h" namespace LAMMPS_NS { -class NPairSkipSizeOff2onTrim : public NPair { +class NPairSkipTrimSizeOff2on : public NPair { public: - NPairSkipSizeOff2onTrim(class LAMMPS *); + NPairSkipTrimSizeOff2on(class LAMMPS *); void build(class NeighList *) override; }; diff --git a/src/npair_skip_size_off2on_oneside_trim.cpp b/src/npair_skip_trim_size_off2on_oneside.cpp similarity index 96% rename from src/npair_skip_size_off2on_oneside_trim.cpp rename to src/npair_skip_trim_size_off2on_oneside.cpp index 91940d3135..9d43ac8087 100644 --- a/src/npair_skip_size_off2on_oneside_trim.cpp +++ b/src/npair_skip_trim_size_off2on_oneside.cpp @@ -12,7 +12,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "npair_skip_size_off2on_oneside_trim.h" +#include "npair_skip_trim_size_off2on_oneside.h" #include "atom.h" #include "domain.h" @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairSkipSizeOff2onOnesideTrim::NPairSkipSizeOff2onOnesideTrim(LAMMPS *lmp) : +NPairSkipTrimSizeOff2onOneside::NPairSkipTrimSizeOff2onOneside(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- @@ -34,7 +34,7 @@ NPairSkipSizeOff2onOnesideTrim::NPairSkipSizeOff2onOnesideTrim(LAMMPS *lmp) : this skip list is newton on and onesided ------------------------------------------------------------------------- */ -void NPairSkipSizeOff2onOnesideTrim::build(NeighList *list) +void NPairSkipTrimSizeOff2onOneside::build(NeighList *list) { int i,j,ii,jj,itype,jnum,joriginal,flip,tmp; int *surf,*jlist; diff --git a/src/npair_skip_size_off2on_oneside_trim.h b/src/npair_skip_trim_size_off2on_oneside.h similarity index 78% rename from src/npair_skip_size_off2on_oneside_trim.h rename to src/npair_skip_trim_size_off2on_oneside.h index 236b886fe4..27861123dd 100644 --- a/src/npair_skip_size_off2on_oneside_trim.h +++ b/src/npair_skip_trim_size_off2on_oneside.h @@ -13,24 +13,24 @@ #ifdef NPAIR_CLASS // clang-format off -NPairStyle(skip/size/off2on/oneside/trim, - NPairSkipSizeOff2onOnesideTrim, +NPairStyle(skip/trim/size/off2on/oneside, + NPairSkipTrimSizeOff2onOneside, NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM); // clang-format on #else -#ifndef LMP_NPAIR_SKIP_SIZE_OFF2ON_ONESIDE_TRIM_H -#define LMP_NPAIR_SKIP_SIZE_OFF2ON_ONESIDE_TRIM_H +#ifndef LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_ONESIDE_H +#define LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_ONESIDE_H #include "npair.h" namespace LAMMPS_NS { -class NPairSkipSizeOff2onOnesideTrim : public NPair { +class NPairSkipTrimSizeOff2onOneside : public NPair { public: - NPairSkipSizeOff2onOnesideTrim(class LAMMPS *); + NPairSkipTrimSizeOff2onOneside(class LAMMPS *); void build(class NeighList *) override; }; From be286d2c7d58302cb3de19caa614a90018f25d07 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 18 Nov 2023 06:21:28 -0500 Subject: [PATCH 043/116] lower the C++ standard to 14 for some files when compiling with intel classic compiler --- cmake/CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 95d738d279..961bcef551 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -428,6 +428,18 @@ if(BUILD_OMP) target_link_libraries(lmp PRIVATE OpenMP::OpenMP_CXX) endif() +# lower C++ standard for fmtlib sources when using Intel classic compiler +if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17) + AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 2021.10)) + message(STATUS "Lowering C++ standard for compiling fmtlib sources with Intel Classic compiler") + get_filename_component(LMP_UTILS_SRC "${LAMMPS_SOURCE_DIR}/utils.cpp" ABSOLUTE) + get_filename_component(LMP_VARIABLE_SRC "${LAMMPS_SOURCE_DIR}/variable.cpp" ABSOLUTE) + get_filename_component(FMT_FORMAT_SRC "${LAMMPS_SOURCE_DIR}/fmtlib_format.cpp" ABSOLUTE) + get_filename_component(FMT_OS_SRC "${LAMMPS_SOURCE_DIR}/fmtlib_os.cpp" ABSOLUTE) + set_source_files_properties("${FMT_FORMAT_SRC}" "${FMT_OS_SRC}" "${LMP_VARIABLE_SRC}" "${LMP_UTILS_SRC}" + PROPERTIES COMPILE_OPTIONS "-std=c++14") +endif() + if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_TOOLS) enable_language(C) if (NOT USE_INTERNAL_LINALG) From c66710934dd895558d4b37ffeb574cda3b89971d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 18 Nov 2023 06:22:16 -0500 Subject: [PATCH 044/116] reduce warnings when compiling with intel classic compilers --- cmake/CMakeLists.txt | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_mpich | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 961bcef551..3b0b069556 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -106,7 +106,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4) set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512") else() - set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=2196") + set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=11074 -diag-disable=11076 -diag-disable=2196") endif() endif() endif() diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi index 3439244b09..8b3bd754cc 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi @@ -6,7 +6,7 @@ SHELL = /bin/sh # compiler/linker settings # specify flags and libraries needed for your compiler -CC = mpiicpc -std=c++11 -diag-disable=10441 -diag-disable=2196 +CC = mpiicpc -std=c++11 -diag-disable=10441 -diag-disable=2196 -diag-disable=11074 -diag-disable=11076 OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich index 042b207c91..21387fe5af 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich @@ -6,7 +6,7 @@ SHELL = /bin/sh # compiler/linker settings # specify flags and libraries needed for your compiler -CC = mpicxx -cxx=icc -std=c++11 -diag-disable=10441 -diag-disable=2196 +CC = mpicxx -cxx=icc -std=c++11 -diag-disable=10441 -diag-disable=2196 -diag-disable=11074 -diag-disable=11076 OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi index 9f6de64987..537ad5a8dc 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi @@ -7,7 +7,7 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler export OMPI_CXX = icc -CC = mpicxx -std=c++11 -diag-disable=10441 -diag-disable=2196 +CC = mpicxx -std=c++11 -diag-disable=10441 -diag-disable=2196 -diag-disable=11074 -diag-disable=11076 OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ From baa00d22e918001ad46292e55b2a9d9a0a960c21 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 18 Nov 2023 06:22:25 -0500 Subject: [PATCH 045/116] simplify --- src/fmt/compile.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fmt/compile.h b/src/fmt/compile.h index a1fb7d2935..af76507f07 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -61,7 +61,8 @@ const T& first(const T& value, const Tail&...) { return value; } -#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) && (FMT_CPLUSPLUS >= 201703L) && !FMT_ICC_VERSION +// LAMMPS customization: only use 'if constexpr' with C++17 +#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) && (FMT_CPLUSPLUS >= 201703L) template struct type_list {}; // Returns a reference to the argument at index N from [first, rest...]. From 2d4527d59a90f0bf13cf5e57565745dcedb1ef1c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 18 Nov 2023 07:14:38 -0500 Subject: [PATCH 046/116] tweak intel compiler makefile for traditional build --- src/MAKE/OPTIONS/Makefile.intel_coprocessor | 13 +++++++++++++ src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi | 13 +++++++++++++ src/MAKE/OPTIONS/Makefile.intel_cpu_mpich | 13 +++++++++++++ src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi | 13 +++++++++++++ 4 files changed, 52 insertions(+) diff --git a/src/MAKE/OPTIONS/Makefile.intel_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_coprocessor index 99e8d22d82..d8a67428de 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_coprocessor +++ b/src/MAKE/OPTIONS/Makefile.intel_coprocessor @@ -14,6 +14,7 @@ CCFLAGS = -qopenmp -qoffload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG -DLMP_INTEL_OFFLOAD \ $(OPTFLAGS) -I$(MKLROOT)/include SHFLAGS = -fPIC +FMTFLAGS = -std=c++11 DEPFLAGS = -M LINK = mpiicpc -std=c++11 @@ -118,6 +119,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi index 8b3bd754cc..681bbaabe1 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi @@ -13,6 +13,7 @@ CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC +FMTFLAGS = -std=c++11 DEPFLAGS = -M LINK = mpiicpc -std=c++11 -diag-disable=10441 -diag-disable=2196 @@ -117,6 +118,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich index 21387fe5af..f1a8c97fc6 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich @@ -13,6 +13,7 @@ CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC +FMTFLAGS = -std=c++11 DEPFLAGS = -M LINK = mpicxx -cxx=icc -std=c++11 -diag-disable=10441 -diag-disable=2196 @@ -117,6 +118,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi index 537ad5a8dc..c3e4451c7c 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi @@ -14,6 +14,7 @@ CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC +FMTFLAGS = -std=c++11 DEPFLAGS = -M LINK = mpicxx -std=c++11 -diag-disable=10441 -diag-disable=2196 @@ -118,6 +119,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) From 0487dc5331504ddef7168f0cc3495d6410d86874 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 18 Nov 2023 11:15:53 -0500 Subject: [PATCH 047/116] copy intel C++17 compiler hack to Kokkos makefiles --- src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi | 16 +++++++++++++++- src/MAKE/OPTIONS/Makefile.kokkos_mpi_only | 14 ++++++++++++++ src/MAKE/OPTIONS/Makefile.kokkos_omp | 14 ++++++++++++++ src/MAKE/OPTIONS/Makefile.kokkos_phi | 14 ++++++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi index b73c441c71..e78be1acdc 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi +++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi @@ -12,6 +12,8 @@ export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper CC = mpicxx CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma SHFLAGS = -fPIC +# uncomment when compiling with Intel 21.5 or older +FMTFLAGS = # -std=c++11 DEPFLAGS = -M LINK = mpicxx @@ -36,7 +38,7 @@ KOKKOS_ARCH = Volta70 LMP_INC = -DLAMMPS_GZIP # MPI library -# see discussion in Section 2.2 (step 5) of manual +# see discussion in Section 3.4 of the manual # MPI wrapper compiler/linker can provide this info # can point to dummy MPI library in src/STUBS as in Makefile.serial # use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts @@ -118,6 +120,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only b/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only index 0adb53eef0..e1f7005617 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only +++ b/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only @@ -9,6 +9,8 @@ SHELL = /bin/sh CC = mpicxx CCFLAGS = -g -O3 -DNDEBUG SHFLAGS = -fPIC +# uncomment when compiling with Intel 21.5 or older +FMTFLAGS = # -std=c++11 DEPFLAGS = -M LINK = mpicxx @@ -114,6 +116,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_omp b/src/MAKE/OPTIONS/Makefile.kokkos_omp index 82144652dd..5f91af7a1e 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_omp +++ b/src/MAKE/OPTIONS/Makefile.kokkos_omp @@ -9,6 +9,8 @@ SHELL = /bin/sh CC = mpicxx CCFLAGS = -g -O3 -DNDEBUG SHFLAGS = -fPIC +# uncomment when compiling with Intel 21.5 or older +FMTFLAGS = # -std=c++11 DEPFLAGS = -M LINK = mpicxx @@ -114,6 +116,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_phi b/src/MAKE/OPTIONS/Makefile.kokkos_phi index 9d5691251c..05b24f8721 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_phi +++ b/src/MAKE/OPTIONS/Makefile.kokkos_phi @@ -9,6 +9,8 @@ SHELL = /bin/sh CC = mpicxx CCFLAGS = -g -O3 -DNDEBUG SHFLAGS = -fPIC +# uncomment when compiling with Intel 21.5 or older +FMTFLAGS = # -std=c++11 DEPFLAGS = -M LINK = mpicxx @@ -115,6 +117,18 @@ $(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< +variable.o : ../variable.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +utils.o : ../utils.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_format.o : ../fmtlib_format.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + +fmtlib_os.o : ../fmtlib_os.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) $(FMTFLAGS) -c $< + # Individual dependencies depend : fastdep.exe $(SRC) From 11305107d95be6c395a785c6e7b5946ed17d2441 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 19 Nov 2023 05:40:05 -0500 Subject: [PATCH 048/116] update release date to 21 November 2023 --- doc/lammps.1 | 4 ++-- doc/src/Commands_removed.rst | 4 ++-- doc/src/compute_composition_atom.rst | 2 +- doc/src/compute_property_grid.rst | 2 +- doc/src/compute_reduce.rst | 2 +- doc/src/compute_voronoi_atom.rst | 2 +- doc/src/dump.rst | 2 +- doc/src/dump_image.rst | 2 +- doc/src/fix_deposit.rst | 2 +- doc/src/fix_pimd.rst | 2 +- src/library.cpp | 8 ++++---- src/version.h | 2 +- 12 files changed, 17 insertions(+), 17 deletions(-) diff --git a/doc/lammps.1 b/doc/lammps.1 index 766522d4aa..100ea9b663 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "17 November 2023" "2023-11-17" +.TH LAMMPS "1" "21 November 2023" "2023-11-21" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 17 November 2023 +\- Molecular Dynamics Simulator. Version 21 November 2023 .SH SYNOPSIS .B lmp diff --git a/doc/src/Commands_removed.rst b/doc/src/Commands_removed.rst index d0e723aabe..8a00c63734 100644 --- a/doc/src/Commands_removed.rst +++ b/doc/src/Commands_removed.rst @@ -88,7 +88,7 @@ The same functionality is available through MPIIO package ------------- -.. deprecated:: 17Nov2023 +.. deprecated:: 21Nov2023 The MPIIO package has been removed from LAMMPS since it was unmaintained for many years and thus not updated to incorporate required changes that @@ -107,7 +107,7 @@ see :doc:`restart `, :doc:`read_restart `, MSCG package ------------ -.. deprecated:: 17Nov2023 +.. deprecated:: 21Nov2023 The MSCG package has been removed from LAMMPS since it was unmaintained for many years and instead superseded by the `OpenMSCG software diff --git a/doc/src/compute_composition_atom.rst b/doc/src/compute_composition_atom.rst index e65a3e9c95..e973eaa234 100644 --- a/doc/src/compute_composition_atom.rst +++ b/doc/src/compute_composition_atom.rst @@ -36,7 +36,7 @@ Examples Description """"""""""" -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 Define a computation that calculates a local composition vector for each atom. For a central atom with :math:`M` neighbors within the neighbor cutoff sphere, diff --git a/doc/src/compute_property_grid.rst b/doc/src/compute_property_grid.rst index a0b9aba7dc..dfdce220c6 100644 --- a/doc/src/compute_property_grid.rst +++ b/doc/src/compute_property_grid.rst @@ -61,7 +61,7 @@ varying fastest, then Y, then Z slowest. For 2d grids (in 2d simulations), the grid IDs range from 1 to Nx*Ny, with X varying fastest and Y slowest. -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 The *proc* attribute is the ID of the processor which owns the grid cell. Processor IDs range from 0 to Nprocs - 1, where Nprocs is the diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index ba60b52563..604b1c1571 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -201,7 +201,7 @@ information in this context, the *replace* keywords will extract the atom IDs for the two atoms in the bond of maximum stretch. These atom IDs and the bond stretch will be printed with thermodynamic output. -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 The *inputs* keyword allows selection of whether all the inputs are per-atom or local quantities. As noted above, all the inputs must be diff --git a/doc/src/compute_voronoi_atom.rst b/doc/src/compute_voronoi_atom.rst index 5f00a2abab..3bada09518 100644 --- a/doc/src/compute_voronoi_atom.rst +++ b/doc/src/compute_voronoi_atom.rst @@ -190,7 +190,7 @@ Voro++ software in the src/VORONOI/README file. Output info """"""""""" -.. deprecated:: 17Nov2023 +.. deprecated:: 21Nov2023 The *peratom* keyword was removed as it is no longer required. diff --git a/doc/src/dump.rst b/doc/src/dump.rst index bdaefb769e..6d13b43200 100644 --- a/doc/src/dump.rst +++ b/doc/src/dump.rst @@ -613,7 +613,7 @@ when running on large numbers of processors. Note that using the "\*" and "%" characters together can produce a large number of small dump files! -.. deprecated:: 17Nov2023 +.. deprecated:: 21Nov2023 The MPIIO package and the the corresponding "/mpiio" dump styles, except for the unrelated "netcdf/mpiio" style were removed from LAMMPS. diff --git a/doc/src/dump_image.rst b/doc/src/dump_image.rst index 43f182b889..3102caaa97 100644 --- a/doc/src/dump_image.rst +++ b/doc/src/dump_image.rst @@ -599,7 +599,7 @@ image will appear. The *sfactor* value must be a value 0.0 <= *sfactor* <= 1.0, where *sfactor* = 1 is a highly reflective surface and *sfactor* = 0 is a rough non-shiny surface. -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 The *fsaa* keyword can be used with the dump image command to improve the image quality by enabling full scene anti-aliasing. Internally the diff --git a/doc/src/fix_deposit.rst b/doc/src/fix_deposit.rst index ff5afc2241..5264999839 100644 --- a/doc/src/fix_deposit.rst +++ b/doc/src/fix_deposit.rst @@ -220,7 +220,7 @@ rotated configuration of the molecule. existing particle. LAMMPS will issue a warning if R is smaller than this value, based on the radii of existing and inserted particles. -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 The *var* and *set* keywords can be used together to provide a criterion for accepting or rejecting the addition of an individual atom, based on its diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 7468ffea13..a2e137da25 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -149,7 +149,7 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics only the k > 0 modes are thermostatted, not the centroid degrees of freedom. -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 Mode *pimd* added to fix pimd/langevin. diff --git a/src/library.cpp b/src/library.cpp index 7fc79333ad..363110e5f5 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -617,7 +617,7 @@ combined by removing the '&' and the following newline character. After this processing the string is handed to LAMMPS for parsing and executing. -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 The command is now able to process long strings with triple quotes and loops using :doc:`jump SELF \ `. @@ -2484,7 +2484,7 @@ int lammps_set_variable(void *handle, char *name, char *str) * \verbatim embed:rst -.. versionadded:: 17Nov2023 +.. versionadded:: 21Nov2023 This function copies a string with human readable information about a defined variable: name, style, current value(s) into the provided @@ -5581,7 +5581,7 @@ int lammps_config_has_ffmpeg_support() { * \verbatim embed:rst -.. deprecated:: 17Nov2023 +.. deprecated:: 21Nov2023 LAMMPS has now exceptions always enabled, so this function will now always return 1 and can be removed from applications @@ -6658,7 +6658,7 @@ the failing MPI ranks to send messages. instance, but instead would check the global error buffer of the library interface. - .. versionchanged: 17Nov2023 + .. versionchanged: 21Nov2023 The *buffer* pointer may be ``NULL``. This will clear any error status without copying the error message. diff --git a/src/version.h b/src/version.h index a5844f0973..e2f596d1aa 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "17 Nov 2023" +#define LAMMPS_VERSION "21 Nov 2023" From 3c73882a681c2cdfca785235818824a28dba76c7 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 19 Nov 2023 08:59:27 -0500 Subject: [PATCH 049/116] update Purge.list and avoid redundant checks --- cmake/Modules/LAMMPSUtils.cmake | 14 +++++++------- src/Purge.list | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cmake/Modules/LAMMPSUtils.cmake b/cmake/Modules/LAMMPSUtils.cmake index bb5ea07609..2ec9d1b706 100644 --- a/cmake/Modules/LAMMPSUtils.cmake +++ b/cmake/Modules/LAMMPSUtils.cmake @@ -83,17 +83,17 @@ function(check_for_autogen_files source_dir) file(GLOB SRC_AUTOGEN_FILES CONFIGURE_DEPENDS ${source_dir}/style_*.h) file(GLOB SRC_AUTOGEN_PACKAGES CONFIGURE_DEPENDS ${source_dir}/packages_*.h) list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h) - list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp) + list(APPEND SRC_AUTOGEN_FILES ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp) foreach(_SRC ${SRC_AUTOGEN_FILES}) get_filename_component(FILENAME "${_SRC}" NAME) if(EXISTS ${source_dir}/${FILENAME}) message(FATAL_ERROR "\n########################################################################\n" - "Found header file(s) generated by the make-based build system\n" - "\n" - "Please run\n" - "make -C ${source_dir} purge\n" - "to remove\n" - "########################################################################") + "Found header file ${source_dir}/${FILENAME} generated by the make-based build system\n" + "\n" + "Please run\n" + "make -C ${source_dir} purge\n" + "to remove\n" + "########################################################################") endif() endforeach() endfunction() diff --git a/src/Purge.list b/src/Purge.list index 3052c3af1d..14708a111e 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -8,6 +8,7 @@ style_compute.h style_dihedral.h style_dump.h style_fix.h +style_gran_sub_mod.h style_improper.h style_integrate.h style_kspace.h @@ -32,6 +33,7 @@ packages_compute.h packages_dihedral.h packages_dump.h packages_fix.h +packages_gran_sub_mod.h packages_improper.h packages_integrate.h packages_kspace.h From bc692dce791ec90aa9c2cb25ca2e24585a8ddd42 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 19 Nov 2023 09:18:17 -0500 Subject: [PATCH 050/116] add missing entries --- src/.gitignore | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/.gitignore b/src/.gitignore index 60cfe7eea3..3ee771e139 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -46,6 +46,33 @@ /mdi_plugin.cpp /mdi_plugin.h +/amoeba_charge_transfer.cpp +/amoeba_convolution.cpp +/amoeba_convolution.h +/amoeba_dispersion.cpp +/amoeba_file.cpp +/amoeba_hal.cpp +/amoeba_induce.cpp +/amoeba_kspace.cpp +/amoeba_multipole.cpp +/amoeba_polar.cpp +/amoeba_repulsion.cpp +/amoeba_utils.cpp +/angle_amoeba.cpp +/angle_amoeba.h +/atom_vec_amoeba.cpp +/atom_vec_amoeba.h +/fix_amoeba_bitorsion.cpp +/fix_amoeba_bitorsion.h +/fix_amoeba_pitorsion.cpp +/fix_amoeba_pitorsion.h +/improper_amoeba.cpp +/improper_amoeba.h +/pair_amoeba.cpp +/pair_amoeba.h +/pair_hippo.cpp +/pair_hippo.h + /fix_brownian*.cpp /fix_brownian*.h /fix_propel_self.cpp From 6cd2055084823c897d81fbbc6147c3a4cf7e6c93 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 19 Nov 2023 09:18:40 -0500 Subject: [PATCH 051/116] delete obsolete dependencies --- src/INTEL/Install.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/INTEL/Install.sh b/src/INTEL/Install.sh index 2d6d82fae5..94d967b194 100755 --- a/src/INTEL/Install.sh +++ b/src/INTEL/Install.sh @@ -51,8 +51,6 @@ action npair_intel.cpp action intel_simd.h action intel_intrinsics.h pair_tersoff_intel.cpp action intel_intrinsics_airebo.h pair_airebo_intel.cpp -action electrode_accel_intel.h fix_electrode_conp.cpp -action electrode_accel_intel.cpp fix_electrode_conp.cpp if (test $mode = 1) then From ac90a4ac9c2c8b7d9fc5302a7af44d29ca58944c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 19 Nov 2023 09:19:03 -0500 Subject: [PATCH 052/116] reorder for cleaner dependency handling --- src/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5f905ee145..196641d67d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -49,6 +49,8 @@ endif # PACKEXT = subset that require an external (downloaded) library PACKAGE = \ + molecule \ + kspace \ adios \ amoeba \ asphere \ @@ -85,7 +87,6 @@ PACKAGE = \ interlayer \ kim \ kokkos \ - kspace \ latboltz \ lepton \ machdyn \ @@ -104,7 +105,6 @@ PACKAGE = \ ml-rann \ ml-snap \ mofff \ - molecule \ molfile \ netcdf \ openmp \ @@ -143,9 +143,11 @@ PACKAGE = \ # NOTE: the last four packages must remain at the end since # they depend on other packages to be installed first. -PACKBASIC = kspace manybody molecule rigid +PACKBASIC = molecule kspace manybody rigid PACKMOST = \ + kspace \ + molecule \ amoeba \ asphere \ bocs \ @@ -173,7 +175,6 @@ PACKMOST = \ fep \ granular \ interlayer \ - kspace \ manybody \ mc \ meam \ @@ -181,7 +182,6 @@ PACKMOST = \ misc \ ml-snap \ mofff \ - molecule \ openmp \ opt \ orient \ From f25075db6c6232985afb87db0a50f0e9aa7315e1 Mon Sep 17 00:00:00 2001 From: Connor Allen Date: Tue, 21 Nov 2023 13:57:34 +0000 Subject: [PATCH 053/116] adding the scale feature of quip calculator, allows fix adapt functionallity --- src/ML-QUIP/pair_quip.cpp | 44 ++++++++++++++++++++++++--------------- src/ML-QUIP/pair_quip.h | 4 +++- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/ML-QUIP/pair_quip.cpp b/src/ML-QUIP/pair_quip.cpp index aae008f1b2..3e3de8e098 100644 --- a/src/ML-QUIP/pair_quip.cpp +++ b/src/ML-QUIP/pair_quip.cpp @@ -56,6 +56,7 @@ PairQUIP::~PairQUIP() if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); + memory->destroy(scale); delete[] map; } delete[] quip_potential; @@ -157,7 +158,7 @@ void PairQUIP::compute(int eflag, int vflag) iquip = 0; for (ii = 0; ii < ntotal; ii++) { for (jj = 0; jj < 3; jj++) { - f[ii][jj] += quip_force[iquip]; + f[ii][jj] += scale[1][1]*quip_force[iquip]; iquip++; } } @@ -165,27 +166,27 @@ void PairQUIP::compute(int eflag, int vflag) if (eflag_global) { eng_vdwl = quip_energy; } if (eflag_atom) { - for (ii = 0; ii < ntotal; ii++) { eatom[ii] = quip_local_e[ii]; } + for (ii = 0; ii < ntotal; ii++) { eatom[ii] = scale[1][1]*quip_local_e[ii]; } } if (vflag_global) { - virial[0] = quip_virial[0]; - virial[1] = quip_virial[4]; - virial[2] = quip_virial[8]; - virial[3] = (quip_virial[3] + quip_virial[1]) * 0.5; - virial[4] = (quip_virial[2] + quip_virial[6]) * 0.5; - virial[5] = (quip_virial[5] + quip_virial[7]) * 0.5; + virial[0] = scale[1][1]*quip_virial[0]; + virial[1] = scale[1][1]*quip_virial[4]; + virial[2] = scale[1][1]*quip_virial[8]; + virial[3] = scale[1][1]*(quip_virial[3] + quip_virial[1]) * 0.5; + virial[4] = scale[1][1]*(quip_virial[2] + quip_virial[6]) * 0.5; + virial[5] = scale[1][1]*(quip_virial[5] + quip_virial[7]) * 0.5; } if (vflag_atom) { int iatom = 0; for (ii = 0; ii < ntotal; ii++) { - vatom[ii][0] += quip_local_virial[iatom + 0]; - vatom[ii][1] += quip_local_virial[iatom + 4]; - vatom[ii][2] += quip_local_virial[iatom + 8]; - vatom[ii][3] += (quip_local_virial[iatom + 3] + quip_local_virial[iatom + 1]) * 0.5; - vatom[ii][4] += (quip_local_virial[iatom + 2] + quip_local_virial[iatom + 6]) * 0.5; - vatom[ii][5] += (quip_local_virial[iatom + 5] + quip_local_virial[iatom + 7]) * 0.5; + vatom[ii][0] += scale[1][1]*quip_local_virial[iatom + 0]; + vatom[ii][1] += scale[1][1]*quip_local_virial[iatom + 4]; + vatom[ii][2] += scale[1][1]*quip_local_virial[iatom + 8]; + vatom[ii][3] += scale[1][1]*(quip_local_virial[iatom + 3] + quip_local_virial[iatom + 1]) * 0.5; + vatom[ii][4] += scale[1][1]*(quip_local_virial[iatom + 2] + quip_local_virial[iatom + 6]) * 0.5; + vatom[ii][5] += scale[1][1]*(quip_local_virial[iatom + 5] + quip_local_virial[iatom + 7]) * 0.5; iatom += 9; } } @@ -314,8 +315,17 @@ void PairQUIP::init_style() /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ - -double PairQUIP::init_one(int /*i*/, int /*j*/) -{ +double PairQUIP::init_one(int i, int j) +{ scale[j][i] = scale[i][j]; return cutoff; } + + +/* for fix adapt scaling */ +void *PairQUIP::extract(const char *str, int &dim) +{ + dim = 2; + if (strcmp(str,"scale") == 0) return (void *) scale; + return nullptr; +} + diff --git a/src/ML-QUIP/pair_quip.h b/src/ML-QUIP/pair_quip.h index 3c0cdabf3b..bfabfca05d 100644 --- a/src/ML-QUIP/pair_quip.h +++ b/src/ML-QUIP/pair_quip.h @@ -42,7 +42,9 @@ class PairQUIP : public Pair { void init_style() override; double init_one(int, int) override; void allocate(); - + void *extract(const char *, int &); + protected: + double **scale; private: double cutoff; int *quip_potential; From fc28fc318beede342a2f013f6f673c35fba15920 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Tue, 21 Nov 2023 10:57:33 -0700 Subject: [PATCH 054/116] Update CODEOWNERS for cmake --- .github/CODEOWNERS | 6 +++--- cmake/CMakeLists.txt | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b6892aa4ee..1b4cae3aaa 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -153,12 +153,12 @@ tools/vim/* @hammondkd unittest/* @akohlmey # cmake -cmake/* @rbberger +cmake/* @akohlmey cmake/Modules/LAMMPSInterfacePlugin.cmake @akohlmey cmake/Modules/MPI4WIN.cmake @akohlmey cmake/Modules/OpenCLLoader.cmake @akohlmey -cmake/Modules/Packages/COLVARS.cmake @rbberger @giacomofiorin -cmake/Modules/Packages/KIM.cmake @rbberger @ellio167 +cmake/Modules/Packages/COLVARS.cmake @giacomofiorin +cmake/Modules/Packages/KIM.cmake @ellio167 cmake/presets/*.cmake @akohlmey # python diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3b0b069556..28e02bbee7 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -2,7 +2,6 @@ ######################################## # CMake build system # This file is part of LAMMPS -# Created by Christoph Junghans and Richard Berger cmake_minimum_required(VERSION 3.16) ######################################## # set policy to silence warnings about ignoring _ROOT but use it From 3306b95589108fb0e73e8e423e34bb62bcea91d0 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 21 Nov 2023 15:02:12 -0700 Subject: [PATCH 055/116] Update Kokkos library in LAMMPS to v4.2 --- lib/kokkos/CHANGELOG.md | 93 +- lib/kokkos/CMakeLists.txt | 5 +- lib/kokkos/Makefile.kokkos | 70 +- lib/kokkos/Makefile.targets | 2 + lib/kokkos/algorithms/CMakeLists.txt | 2 +- .../algorithms/src/Kokkos_NestedSort.hpp | 184 +- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 6 +- lib/kokkos/algorithms/src/Kokkos_Sort.hpp | 759 +------- .../src/sorting/Kokkos_BinOpsPublicAPI.hpp | 129 ++ .../src/sorting/Kokkos_BinSortPublicAPI.hpp | 410 ++++ .../sorting/Kokkos_NestedSortPublicAPI.hpp | 100 + .../src/sorting/Kokkos_SortPublicAPI.hpp | 194 ++ .../impl/Kokkos_CopyOpsForBinSortImpl.hpp | 61 + .../sorting/impl/Kokkos_NestedSortImpl.hpp | 115 ++ .../src/sorting/impl/Kokkos_SortImpl.hpp | 369 ++++ .../Kokkos_AdjacentDifference.hpp | 203 +- .../std_algorithms/Kokkos_AdjacentFind.hpp | 117 +- .../src/std_algorithms/Kokkos_AllOf.hpp | 64 +- .../src/std_algorithms/Kokkos_AnyOf.hpp | 64 +- .../src/std_algorithms/Kokkos_Copy.hpp | 65 +- .../std_algorithms/Kokkos_CopyBackward.hpp | 65 +- .../src/std_algorithms/Kokkos_CopyIf.hpp | 71 +- .../src/std_algorithms/Kokkos_CopyN.hpp | 68 +- .../src/std_algorithms/Kokkos_Count.hpp | 60 +- .../src/std_algorithms/Kokkos_CountIf.hpp | 64 +- .../src/std_algorithms/Kokkos_Equal.hpp | 289 ++- .../std_algorithms/Kokkos_ExclusiveScan.hpp | 243 ++- .../src/std_algorithms/Kokkos_Fill.hpp | 54 +- .../src/std_algorithms/Kokkos_FillN.hpp | 58 +- .../src/std_algorithms/Kokkos_Find.hpp | 60 +- .../src/std_algorithms/Kokkos_FindEnd.hpp | 144 +- .../src/std_algorithms/Kokkos_FindFirstOf.hpp | 155 +- .../src/std_algorithms/Kokkos_FindIf.hpp | 70 +- .../src/std_algorithms/Kokkos_FindIfNot.hpp | 63 +- .../src/std_algorithms/Kokkos_ForEach.hpp | 67 +- .../src/std_algorithms/Kokkos_ForEachN.hpp | 72 +- .../src/std_algorithms/Kokkos_Generate.hpp | 54 +- .../src/std_algorithms/Kokkos_GenerateN.hpp | 63 +- .../std_algorithms/Kokkos_InclusiveScan.hpp | 311 +++- .../std_algorithms/Kokkos_IsPartitioned.hpp | 61 +- .../src/std_algorithms/Kokkos_IsSorted.hpp | 116 +- .../std_algorithms/Kokkos_IsSortedUntil.hpp | 115 +- .../Kokkos_LexicographicalCompare.hpp | 133 +- .../src/std_algorithms/Kokkos_MaxElement.hpp | 105 +- .../src/std_algorithms/Kokkos_MinElement.hpp | 105 +- .../std_algorithms/Kokkos_MinMaxElement.hpp | 107 +- .../src/std_algorithms/Kokkos_Mismatch.hpp | 145 +- .../src/std_algorithms/Kokkos_Move.hpp | 64 +- .../std_algorithms/Kokkos_MoveBackward.hpp | 67 +- .../src/std_algorithms/Kokkos_NoneOf.hpp | 65 +- .../std_algorithms/Kokkos_PartitionCopy.hpp | 90 +- .../std_algorithms/Kokkos_PartitionPoint.hpp | 62 +- .../src/std_algorithms/Kokkos_Reduce.hpp | 203 +- .../src/std_algorithms/Kokkos_Remove.hpp | 64 +- .../src/std_algorithms/Kokkos_RemoveCopy.hpp | 82 +- .../std_algorithms/Kokkos_RemoveCopyIf.hpp | 71 +- .../src/std_algorithms/Kokkos_RemoveIf.hpp | 66 +- .../src/std_algorithms/Kokkos_Replace.hpp | 63 +- .../src/std_algorithms/Kokkos_ReplaceCopy.hpp | 83 +- .../std_algorithms/Kokkos_ReplaceCopyIf.hpp | 88 +- .../src/std_algorithms/Kokkos_ReplaceIf.hpp | 69 +- .../src/std_algorithms/Kokkos_Reverse.hpp | 53 +- .../src/std_algorithms/Kokkos_ReverseCopy.hpp | 67 +- .../src/std_algorithms/Kokkos_Rotate.hpp | 57 +- .../src/std_algorithms/Kokkos_RotateCopy.hpp | 75 +- .../src/std_algorithms/Kokkos_Search.hpp | 143 +- .../src/std_algorithms/Kokkos_SearchN.hpp | 131 +- .../src/std_algorithms/Kokkos_ShiftLeft.hpp | 54 +- .../src/std_algorithms/Kokkos_ShiftRight.hpp | 54 +- .../src/std_algorithms/Kokkos_SwapRanges.hpp | 66 +- .../src/std_algorithms/Kokkos_Transform.hpp | 208 ++- .../Kokkos_TransformExclusiveScan.hpp | 130 +- .../Kokkos_TransformInclusiveScan.hpp | 244 ++- .../std_algorithms/Kokkos_TransformReduce.hpp | 276 ++- .../src/std_algorithms/Kokkos_Unique.hpp | 125 +- .../src/std_algorithms/Kokkos_UniqueCopy.hpp | 163 +- .../impl/Kokkos_AdjacentDifference.hpp | 58 +- .../impl/Kokkos_AdjacentFind.hpp | 81 +- .../impl/Kokkos_AllOfAnyOfNoneOf.hpp | 55 +- .../impl/Kokkos_Constraints.hpp | 47 +- .../impl/Kokkos_CopyBackward.hpp | 49 +- .../std_algorithms/impl/Kokkos_CopyCopyN.hpp | 75 +- .../src/std_algorithms/impl/Kokkos_CopyIf.hpp | 73 +- .../impl/Kokkos_CountCountIf.hpp | 52 +- .../src/std_algorithms/impl/Kokkos_Equal.hpp | 116 +- .../impl/Kokkos_ExclusiveScan.hpp | 240 ++- .../std_algorithms/impl/Kokkos_FillFillN.hpp | 49 +- .../std_algorithms/impl/Kokkos_FindEnd.hpp | 110 +- .../impl/Kokkos_FindFirstOf.hpp | 80 +- .../impl/Kokkos_FindIfOrNot.hpp | 72 +- .../impl/Kokkos_ForEachForEachN.hpp | 64 +- .../impl/Kokkos_FunctorsForExclusiveScan.hpp | 220 +++ .../impl/Kokkos_GenerateGenerateN.hpp | 51 +- .../impl/Kokkos_InclusiveScan.hpp | 144 +- .../impl/Kokkos_IsPartitioned.hpp | 71 +- .../std_algorithms/impl/Kokkos_IsSorted.hpp | 53 +- .../impl/Kokkos_IsSortedUntil.hpp | 74 +- .../impl/Kokkos_LexicographicalCompare.hpp | 92 +- .../impl/Kokkos_MinMaxMinmaxElement.hpp | 83 +- .../std_algorithms/impl/Kokkos_Mismatch.hpp | 95 +- .../src/std_algorithms/impl/Kokkos_Move.hpp | 33 +- .../impl/Kokkos_MoveBackward.hpp | 42 +- .../impl/Kokkos_MustUseKokkosSingleInTeam.hpp | 47 + .../impl/Kokkos_PartitionCopy.hpp | 76 +- .../impl/Kokkos_PartitionPoint.hpp | 45 +- .../src/std_algorithms/impl/Kokkos_Reduce.hpp | 115 +- .../impl/Kokkos_RemoveAllVariants.hpp | 123 +- .../std_algorithms/impl/Kokkos_Replace.hpp | 28 +- .../impl/Kokkos_ReplaceCopy.hpp | 49 +- .../impl/Kokkos_ReplaceCopyIf.hpp | 65 +- .../std_algorithms/impl/Kokkos_ReplaceIf.hpp | 34 +- .../std_algorithms/impl/Kokkos_Reverse.hpp | 28 +- .../impl/Kokkos_ReverseCopy.hpp | 42 +- .../src/std_algorithms/impl/Kokkos_Rotate.hpp | 34 +- .../std_algorithms/impl/Kokkos_RotateCopy.hpp | 49 +- .../src/std_algorithms/impl/Kokkos_Search.hpp | 110 +- .../std_algorithms/impl/Kokkos_SearchN.hpp | 120 +- .../std_algorithms/impl/Kokkos_ShiftLeft.hpp | 40 +- .../std_algorithms/impl/Kokkos_ShiftRight.hpp | 61 +- .../std_algorithms/impl/Kokkos_SwapRanges.hpp | 39 +- .../std_algorithms/impl/Kokkos_Transform.hpp | 103 +- .../impl/Kokkos_TransformExclusiveScan.hpp | 122 +- .../impl/Kokkos_TransformInclusiveScan.hpp | 212 ++- .../impl/Kokkos_TransformReduce.hpp | 116 +- .../src/std_algorithms/impl/Kokkos_Unique.hpp | 89 +- .../std_algorithms/impl/Kokkos_UniqueCopy.hpp | 130 +- .../algorithms/unit_tests/CMakeLists.txt | 262 ++- lib/kokkos/algorithms/unit_tests/Makefile | 18 +- .../algorithms/unit_tests/TestBinSortA.hpp | 14 +- .../algorithms/unit_tests/TestBinSortB.hpp | 70 +- .../algorithms/unit_tests/TestRandom.hpp | 92 + .../unit_tests/TestRandomAccessIterator.cpp | 11 + .../unit_tests/TestSortCustomComp.hpp | 133 ++ .../unit_tests/TestStdAlgorithmsCommon.cpp | 14 + .../unit_tests/TestStdAlgorithmsCommon.hpp | 499 ++++- .../TestStdAlgorithmsExclusiveScan.cpp | 5 +- .../TestStdAlgorithmsHelperFunctors.hpp | 20 +- .../TestStdAlgorithmsPartitioningOps.cpp | 2 +- ...estStdAlgorithmsTeamAdjacentDifference.cpp | 220 +++ .../TestStdAlgorithmsTeamAdjacentFind.cpp | 256 +++ .../unit_tests/TestStdAlgorithmsTeamAllOf.cpp | 165 ++ .../unit_tests/TestStdAlgorithmsTeamAnyOf.cpp | 165 ++ .../unit_tests/TestStdAlgorithmsTeamCopy.cpp | 157 ++ .../TestStdAlgorithmsTeamCopyBackward.cpp | 168 ++ .../TestStdAlgorithmsTeamCopyIf.cpp | 176 ++ .../TestStdAlgorithmsTeamCopy_n.cpp | 176 ++ .../unit_tests/TestStdAlgorithmsTeamCount.cpp | 201 ++ .../TestStdAlgorithmsTeamCountIf.cpp | 162 ++ .../unit_tests/TestStdAlgorithmsTeamEqual.cpp | 278 +++ .../TestStdAlgorithmsTeamExclusiveScan.cpp | 253 +++ .../unit_tests/TestStdAlgorithmsTeamFill.cpp | 106 ++ .../TestStdAlgorithmsTeamFill_n.cpp | 176 ++ .../unit_tests/TestStdAlgorithmsTeamFind.cpp | 212 +++ .../TestStdAlgorithmsTeamFindEnd.cpp | 271 +++ .../TestStdAlgorithmsTeamFindFirstOf.cpp | 280 +++ .../TestStdAlgorithmsTeamFindIf.cpp | 241 +++ .../TestStdAlgorithmsTeamFindIfNot.cpp | 236 +++ .../TestStdAlgorithmsTeamForEach.cpp | 126 ++ .../TestStdAlgorithmsTeamForEachN.cpp | 144 ++ .../TestStdAlgorithmsTeamGenerate.cpp | 116 ++ .../TestStdAlgorithmsTeamGenerate_n.cpp | 179 ++ .../TestStdAlgorithmsTeamInclusiveScan.cpp | 277 +++ .../TestStdAlgorithmsTeamIsPartitioned.cpp | 255 +++ .../TestStdAlgorithmsTeamIsSorted.cpp | 209 +++ .../TestStdAlgorithmsTeamIsSortedUntil.cpp | 275 +++ ...tdAlgorithmsTeamLexicographicalCompare.cpp | 286 +++ .../TestStdAlgorithmsTeamMaxElement.cpp | 182 ++ .../TestStdAlgorithmsTeamMinElement.cpp | 181 ++ .../TestStdAlgorithmsTeamMinMaxElement.cpp | 200 ++ .../TestStdAlgorithmsTeamMismatch.cpp | 283 +++ .../unit_tests/TestStdAlgorithmsTeamMove.cpp | 161 ++ .../TestStdAlgorithmsTeamMoveBackward.cpp | 170 ++ .../TestStdAlgorithmsTeamNoneOf.cpp | 165 ++ .../TestStdAlgorithmsTeamPartitionCopy.cpp | 313 ++++ .../TestStdAlgorithmsTeamPartitionPoint.cpp | 260 +++ .../TestStdAlgorithmsTeamReduce.cpp | 272 +++ .../TestStdAlgorithmsTeamRemove.cpp | 182 ++ .../TestStdAlgorithmsTeamRemoveCopy.cpp | 222 +++ .../TestStdAlgorithmsTeamRemoveCopyIf.cpp | 178 ++ .../TestStdAlgorithmsTeamRemoveIf.cpp | 166 ++ .../TestStdAlgorithmsTeamReplace.cpp | 135 ++ .../TestStdAlgorithmsTeamReplaceCopy.cpp | 204 ++ .../TestStdAlgorithmsTeamReplaceCopyIf.cpp | 183 ++ .../TestStdAlgorithmsTeamReplaceIf.cpp | 138 ++ .../TestStdAlgorithmsTeamReverse.cpp | 105 ++ .../TestStdAlgorithmsTeamReverseCopy.cpp | 153 ++ .../TestStdAlgorithmsTeamRotate.cpp | 173 ++ .../TestStdAlgorithmsTeamRotateCopy.cpp | 188 ++ .../TestStdAlgorithmsTeamSearch.cpp | 279 +++ .../TestStdAlgorithmsTeamSearchN.cpp | 295 +++ .../TestStdAlgorithmsTeamShiftLeft.cpp | 189 ++ .../TestStdAlgorithmsTeamShiftRight.cpp | 187 ++ .../TestStdAlgorithmsTeamSwapRanges.cpp | 151 ++ ...TestStdAlgorithmsTeamTransformBinaryOp.cpp | 185 ++ ...tdAlgorithmsTeamTransformExclusiveScan.cpp | 228 +++ ...tdAlgorithmsTeamTransformInclusiveScan.cpp | 264 +++ .../TestStdAlgorithmsTeamTransformReduce.cpp | 323 ++++ .../TestStdAlgorithmsTeamTransformUnaryOp.cpp | 176 ++ .../TestStdAlgorithmsTeamUnique.cpp | 171 ++ .../TestStdAlgorithmsTeamUniqueCopy.cpp | 196 ++ ...estStdAlgorithmsTransformExclusiveScan.cpp | 2 +- ...estStdAlgorithmsTransformInclusiveScan.cpp | 14 +- lib/kokkos/appveyor.yml | 2 +- lib/kokkos/benchmarks/CMakeLists.txt | 1 + lib/kokkos/benchmarks/gups/CMakeLists.txt | 4 + lib/kokkos/benchmarks/gups/Makefile | 51 - lib/kokkos/benchmarks/gups/gups-kokkos.cpp | 175 -- lib/kokkos/benchmarks/gups/gups.cpp | 195 ++ lib/kokkos/bin/kokkos_launch_compiler | 4 +- lib/kokkos/bin/nvcc_wrapper | 2 +- lib/kokkos/cmake/KokkosConfigCommon.cmake.in | 1 + lib/kokkos/cmake/KokkosCore_config.h.in | 23 +- lib/kokkos/cmake/Modules/FindTPLROCM.cmake | 26 +- lib/kokkos/cmake/fake_tribits.cmake | 9 - lib/kokkos/cmake/kokkos_arch.cmake | 156 +- lib/kokkos/cmake/kokkos_compiler_id.cmake | 9 +- lib/kokkos/cmake/kokkos_enable_devices.cmake | 10 + lib/kokkos/cmake/kokkos_enable_options.cmake | 9 +- lib/kokkos/cmake/kokkos_test_cxx_std.cmake | 8 +- lib/kokkos/cmake/kokkos_tpls.cmake | 7 +- lib/kokkos/cmake/kokkos_tribits.cmake | 9 + .../containers/performance_tests/TestCuda.cpp | 4 - lib/kokkos/containers/src/Kokkos_Bitset.hpp | 54 +- lib/kokkos/containers/src/Kokkos_DualView.hpp | 9 + .../containers/src/Kokkos_DynRankView.hpp | 2 +- .../containers/src/Kokkos_UnorderedMap.hpp | 88 +- lib/kokkos/containers/src/Kokkos_Vector.hpp | 21 +- .../src/impl/Kokkos_Bitset_impl.hpp | 1 - .../src/impl/Kokkos_UnorderedMap_impl.hpp | 5 +- .../containers/unit_tests/CMakeLists.txt | 8 + lib/kokkos/containers/unit_tests/Makefile | 7 +- .../containers/unit_tests/TestBitset.hpp | 11 - .../containers/unit_tests/TestDualView.hpp | 50 + .../unit_tests/TestUnorderedMap.hpp | 99 +- lib/kokkos/core/perf_test/CMakeLists.txt | 7 +- .../core/perf_test/PerfTest_MallocFree.cpp | 100 + .../core/perf_test/PerfTest_ViewAllocate.cpp | 34 - lib/kokkos/core/perf_test/test_mempool.cpp | 1 + lib/kokkos/core/perf_test/test_taskdag.cpp | 2 + lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp | 8 +- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 103 +- .../Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp | 8 +- .../src/Cuda/Kokkos_Cuda_GraphNodeKernel.hpp | 8 - .../src/Cuda/Kokkos_Cuda_GraphNode_Impl.hpp | 1 - .../core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp | 43 +- .../core/src/Cuda/Kokkos_Cuda_Instance.cpp | 441 ++--- .../core/src/Cuda/Kokkos_Cuda_Instance.hpp | 351 +++- .../src/Cuda/Kokkos_Cuda_KernelLaunch.hpp | 81 +- .../src/Cuda/Kokkos_Cuda_Parallel_Team.hpp | 7 +- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 2 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp | 32 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 67 +- .../core/src/Cuda/Kokkos_Cuda_ZeroMemset.hpp | 14 +- lib/kokkos/core/src/HIP/Kokkos_HIP.cpp | 10 +- lib/kokkos/core/src/HIP/Kokkos_HIP.hpp | 7 +- .../src/HIP/Kokkos_HIP_GraphNodeKernel.hpp | 161 ++ .../src/HIP/Kokkos_HIP_GraphNode_Impl.hpp | 54 + .../core/src/HIP/Kokkos_HIP_Graph_Impl.hpp | 187 ++ .../core/src/HIP/Kokkos_HIP_Instance.cpp | 106 +- .../core/src/HIP/Kokkos_HIP_Instance.hpp | 44 +- .../core/src/HIP/Kokkos_HIP_KernelLaunch.hpp | 157 +- .../src/HIP/Kokkos_HIP_Parallel_MDRange.hpp | 8 +- .../src/HIP/Kokkos_HIP_Parallel_Range.hpp | 102 +- .../core/src/HIP/Kokkos_HIP_Parallel_Team.hpp | 52 +- .../core/src/HIP/Kokkos_HIP_ReduceScan.hpp | 48 +- .../src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp | 9 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp | 23 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp | 3 + lib/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp | 69 +- .../core/src/HIP/Kokkos_HIP_Vectorization.hpp | 2 +- lib/kokkos/core/src/HPX/Kokkos_HPX.cpp | 27 +- lib/kokkos/core/src/HPX/Kokkos_HPX.hpp | 57 +- lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp | 9 +- .../src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp | 3 +- lib/kokkos/core/src/Kokkos_Abort.hpp | 105 ++ lib/kokkos/core/src/Kokkos_Array.hpp | 20 +- lib/kokkos/core/src/Kokkos_Assert.hpp | 70 + .../core/src/Kokkos_BitManipulation.hpp | 37 +- lib/kokkos/core/src/Kokkos_Complex.hpp | 5 + lib/kokkos/core/src/Kokkos_Concepts.hpp | 4 +- lib/kokkos/core/src/Kokkos_CopyViews.hpp | 57 +- lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 1 + lib/kokkos/core/src/Kokkos_Graph.hpp | 6 + lib/kokkos/core/src/Kokkos_Half.hpp | 1 + lib/kokkos/core/src/Kokkos_HostSpace.hpp | 11 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 2 +- .../core/src/Kokkos_MathematicalFunctions.hpp | 10 +- .../Kokkos_MathematicalSpecialFunctions.hpp | 81 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 2 - lib/kokkos/core/src/Kokkos_Pair.hpp | 6 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 1 - lib/kokkos/core/src/Kokkos_Printf.hpp | 54 + lib/kokkos/core/src/Kokkos_ScratchSpace.hpp | 2 +- .../core/src/OpenACC/Kokkos_OpenACC.hpp | 3 + .../OpenACC/Kokkos_OpenACC_FunctorAdapter.hpp | 2 +- .../Kokkos_OpenACC_ParallelFor_Team.hpp | 3 +- .../Kokkos_OpenACC_ParallelScan_Range.hpp | 98 +- .../src/OpenACC/Kokkos_OpenACC_Traits.hpp | 3 + lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp | 2 + lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp | 1 - .../src/OpenMP/Kokkos_OpenMP_Instance.cpp | 2 - .../src/OpenMP/Kokkos_OpenMP_Instance.hpp | 14 +- .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 1267 ------------- .../src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp | 433 +++++ .../OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp | 567 ++++++ .../OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp | 312 ++++ .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 5 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp | 9 +- .../Kokkos_OpenMPTarget_Instance.cpp | 10 +- ...Kokkos_OpenMPTarget_ParallelScan_Range.hpp | 3 +- .../Kokkos_OpenMPTarget_ParallelScan_Team.hpp | 65 +- .../Kokkos_OpenMPTarget_Parallel_Common.hpp | 117 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp | 14 + lib/kokkos/core/src/SYCL/Kokkos_SYCL.hpp | 6 +- .../core/src/SYCL/Kokkos_SYCL_Abort.hpp | 4 +- .../src/SYCL/Kokkos_SYCL_Half_Conversion.hpp | 128 +- .../src/SYCL/Kokkos_SYCL_Half_Impl_Type.hpp | 37 +- .../core/src/SYCL/Kokkos_SYCL_Instance.cpp | 107 +- ...pp => Kokkos_SYCL_ParallelFor_MDRange.hpp} | 133 +- .../SYCL/Kokkos_SYCL_ParallelFor_Range.hpp | 148 ++ .../src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp | 188 ++ .../Kokkos_SYCL_ParallelReduce_MDRange.hpp | 343 ++++ .../SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp | 362 ++++ .../SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp | 458 +++++ ...hpp => Kokkos_SYCL_ParallelScan_Range.hpp} | 34 +- .../src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp | 788 -------- .../src/SYCL/Kokkos_SYCL_Parallel_Team.hpp | 914 --------- .../core/src/SYCL/Kokkos_SYCL_Space.cpp | 2 + lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp | 75 +- .../core/src/SYCL/Kokkos_SYCL_TeamPolicy.hpp | 357 ++++ .../SYCL/Kokkos_SYCL_WorkgroupReduction.hpp | 177 ++ .../core/src/SYCL/Kokkos_SYCL_ZeroMemset.hpp | 2 + lib/kokkos/core/src/Serial/Kokkos_Serial.cpp | 6 + lib/kokkos/core/src/Serial/Kokkos_Serial.hpp | 38 + .../core/src/Threads/Kokkos_Threads.hpp | 16 - .../core/src/Threads/Kokkos_ThreadsExec.hpp | 5 +- .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 63 +- .../Kokkos_Threads_ParallelFor_MDRange.hpp | 115 ++ .../Kokkos_Threads_ParallelFor_Range.hpp | 122 ++ .../Kokkos_Threads_ParallelFor_Team.hpp | 118 ++ ...Kokkos_Threads_ParallelReduce_MDRange.hpp} | 91 +- .../Kokkos_Threads_ParallelReduce_Range.hpp | 171 ++ ...=> Kokkos_Threads_ParallelReduce_Team.hpp} | 94 +- .../Kokkos_Threads_ParallelScan_Range.hpp | 198 ++ .../Threads/Kokkos_Threads_Parallel_Range.hpp | 435 ----- .../Kokkos_Threads_WorkGraphPolicy.hpp | 2 +- .../src/View/MDSpan/Kokkos_MDSpan_Extents.hpp | 10 +- .../src/View/MDSpan/Kokkos_MDSpan_Header.hpp | 24 +- .../core/src/decl/Kokkos_Declare_OPENMP.hpp | 3 + .../core/src/decl/Kokkos_Declare_SYCL.hpp | 11 +- .../core/src/decl/Kokkos_Declare_THREADS.hpp | 10 + lib/kokkos/core/src/impl/Kokkos_Abort.cpp | 44 + .../src/impl/Kokkos_CheckedIntegerOps.hpp | 66 + lib/kokkos/core/src/impl/Kokkos_Core.cpp | 25 +- .../src/impl/Kokkos_Default_Graph_Impl.hpp | 2 - lib/kokkos/core/src/impl/Kokkos_Error.cpp | 24 +- lib/kokkos/core/src/impl/Kokkos_Error.hpp | 148 +- .../core/src/impl/Kokkos_FunctorAnalysis.hpp | 4 - .../impl/Kokkos_Half_FloatingPointWrapper.hpp | 161 +- .../Kokkos_Half_MathematicalFunctions.hpp | 259 +++ .../src/impl/Kokkos_Half_NumericTraits.hpp | 60 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 98 +- .../src/impl/Kokkos_HostSpace_ZeroMemset.hpp | 2 - .../src/impl/Kokkos_HostSpace_deepcopy.cpp | 2 +- .../core/src/impl/Kokkos_HostThreadTeam.hpp | 73 +- lib/kokkos/core/src/impl/Kokkos_Profiling.cpp | 16 +- .../core/src/impl/Kokkos_SharedAlloc.cpp | 50 +- .../core/src/impl/Kokkos_SharedAlloc.hpp | 4 +- .../src/impl/Kokkos_SharedAlloc_timpl.hpp | 6 +- .../core/src/impl/Kokkos_TaskQueueCommon.hpp | 8 +- .../src/impl/Kokkos_TaskQueueMultiple.hpp | 4 +- lib/kokkos/core/src/impl/Kokkos_Utilities.hpp | 27 + .../core/src/impl/Kokkos_ViewMapping.hpp | 4 +- .../core/src/setup/Kokkos_Setup_SYCL.hpp | 5 + lib/kokkos/core/unit_test/CMakeLists.txt | 151 +- lib/kokkos/core/unit_test/TestAbort.hpp | 10 +- lib/kokkos/core/unit_test/TestArrayOps.hpp | 393 ++++ .../core/unit_test/TestAtomicOperations.hpp | 1421 ++++---------- .../TestAtomicOperations_complexdouble.hpp | 26 +- .../TestAtomicOperations_complexfloat.hpp | 26 +- .../unit_test/TestAtomicOperations_double.hpp | 19 +- .../unit_test/TestAtomicOperations_float.hpp | 19 +- .../unit_test/TestAtomicOperations_int.hpp | 29 +- .../TestAtomicOperations_longint.hpp | 29 +- .../TestAtomicOperations_longlongint.hpp | 29 +- .../TestAtomicOperations_unsignedint.hpp | 33 +- .../TestAtomicOperations_unsignedlongint.hpp | 33 +- ...stAtomicOperations_unsignedlonglongint.hpp | 36 + lib/kokkos/core/unit_test/TestAtomics.hpp | 14 +- .../core/unit_test/TestBitManipulation.cpp | 4 +- .../unit_test/TestBitManipulationBuiltins.hpp | 19 +- .../core/unit_test/TestCheckedIntegerOps.hpp | 51 + .../unit_test/TestExecSpacePartitioning.hpp | 69 +- .../core/unit_test/TestHalfOperators.hpp | 107 +- .../TestHostSharedPtrAccessOnDevice.hpp | 6 - .../TestJoinBackwardCompatibility.hpp | 29 +- .../core/unit_test/TestLocalDeepCopy.hpp | 29 +- .../TestMDRangePolicyConstructors.hpp | 2 - lib/kokkos/core/unit_test/TestMDSpan.hpp | 4 +- .../unit_test/TestMathematicalConstants.hpp | 3 +- .../unit_test/TestMathematicalFunctions.hpp | 806 +++++++- .../TestMathematicalSpecialFunctions.hpp | 67 +- lib/kokkos/core/unit_test/TestMinMaxClamp.hpp | 5 - .../core/unit_test/TestNumericTraits.hpp | 122 +- lib/kokkos/core/unit_test/TestOther.hpp | 5 - lib/kokkos/core/unit_test/TestPrintf.hpp | 37 + .../core/unit_test/TestQuadPrecisionMath.hpp | 2 +- lib/kokkos/core/unit_test/TestRange.hpp | 10 +- .../core/unit_test/TestRangePolicyRequire.hpp | 13 +- lib/kokkos/core/unit_test/TestReducers.hpp | 7 +- lib/kokkos/core/unit_test/TestReducers_b.hpp | 4 + lib/kokkos/core/unit_test/TestSharedSpace.cpp | 8 +- .../core/unit_test/TestTaskScheduler.hpp | 2 +- lib/kokkos/core/unit_test/TestTeam.hpp | 25 +- lib/kokkos/core/unit_test/TestTeamScan.hpp | 129 ++ lib/kokkos/core/unit_test/TestTeamScratch.hpp | 18 + lib/kokkos/core/unit_test/TestTeamVector.hpp | 182 +- .../core/unit_test/TestTeamVectorRange.hpp | 22 +- lib/kokkos/core/unit_test/TestUtilities.hpp | 90 + lib/kokkos/core/unit_test/TestViewAPI.hpp | 4 +- .../core/unit_test/TestViewCtorDimMatch.hpp | 4 +- .../TestViewLayoutStrideAssignment.hpp | 2 - .../TestViewMemoryAccessViolation.hpp | 8 +- .../core/unit_test/cuda/TestCuda_Spaces.cpp | 10 - .../headers_self_contained/CMakeLists.txt | 4 + .../hpx/TestHPX_IndependentInstances.cpp | 2 +- ...X_IndependentInstancesDelayedExecution.cpp | 2 +- ...estHPX_IndependentInstancesInstanceIds.cpp | 2 +- .../unit_test/sycl/TestSYCL_InterOp_Init.cpp | 3 +- .../sycl/TestSYCL_InterOp_Init_Context.cpp | 6 +- .../sycl/TestSYCL_InterOp_Streams.cpp | 3 +- .../sycl/TestSYCL_TeamScratchStreams.cpp | 6 +- .../view/TestExtentsDatatypeConversion.cpp | 53 +- .../build_cmake_in_tree/cmake_example.cpp | 2 + .../build_cmake_installed/cmake_example.cpp | 2 + .../foo.cpp | 2 +- .../cmake_example.cpp | 2 + .../03_vectorization/vectorization.cpp | 7 +- lib/kokkos/generate_makefile.bash | 11 +- lib/kokkos/master_history.txt | 1 + lib/kokkos/simd/src/Kokkos_SIMD.hpp | 61 +- lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp | 1216 +++++++++--- lib/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp | 1647 ++++++++++++----- lib/kokkos/simd/src/Kokkos_SIMD_Common.hpp | 167 +- .../simd/src/Kokkos_SIMD_Common_Math.hpp | 260 +++ lib/kokkos/simd/src/Kokkos_SIMD_NEON.hpp | 999 ++++++++-- lib/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp | 229 ++- lib/kokkos/simd/unit_tests/CMakeLists.txt | 2 + lib/kokkos/simd/unit_tests/TestSIMD.cpp | 548 +----- .../unit_tests/include/SIMDTesting_Ops.hpp | 212 +++ .../include/SIMDTesting_Utilities.hpp | 167 ++ .../unit_tests/include/TestSIMD_Condition.hpp | 105 ++ .../include/TestSIMD_Conversions.hpp | 131 ++ .../include/TestSIMD_GeneratorCtors.hpp | 140 ++ .../unit_tests/include/TestSIMD_MaskOps.hpp | 116 ++ .../unit_tests/include/TestSIMD_MathOps.hpp | 289 +++ .../unit_tests/include/TestSIMD_ShiftOps.hpp | 280 +++ .../include/TestSIMD_WhereExpressions.hpp | 195 ++ ...da_cc7_asm_atomic_fetch_op.inc_forceglobal | 153 -- .../cuda_cc7_asm_atomic_fetch_op.inc_generic | 151 -- .../cuda_cc7_asm_atomic_fetch_op.inc_isglobal | 57 +- ...cuda_cc7_asm_atomic_fetch_op.inc_predicate | 54 +- .../cuda_cc7_asm_atomic_op.inc_forceglobal | 64 - .../cuda/cuda_cc7_asm_atomic_op.inc_generic | 64 - .../cuda/cuda_cc7_asm_atomic_op.inc_isglobal | 57 +- .../cuda/cuda_cc7_asm_atomic_op.inc_predicate | 57 +- .../__p0009_bits/compressed_pair.hpp | 54 +- .../experimental/__p0009_bits/config.hpp | 68 +- .../__p0009_bits/default_accessor.hpp | 50 +- .../__p0009_bits/dynamic_extent.hpp | 72 +- .../experimental/__p0009_bits/extents.hpp | 953 +++++----- .../__p0009_bits/full_extent_t.hpp | 48 +- .../experimental/__p0009_bits/layout_left.hpp | 96 +- .../__p0009_bits/layout_right.hpp | 97 +- .../__p0009_bits/layout_stride.hpp | 228 +-- .../experimental/__p0009_bits/macros.hpp | 76 +- .../__p0009_bits/maybe_static_value.hpp | 152 -- .../experimental/__p0009_bits/mdspan.hpp | 197 +- .../__p0009_bits/no_unique_address.hpp | 54 +- .../standard_layout_static_array.hpp | 685 ------- .../__p0009_bits/static_array.hpp | 286 --- .../experimental/__p0009_bits/submdspan.hpp | 586 ------ .../__p0009_bits/trait_backports.hpp | 62 +- .../experimental/__p0009_bits/type_list.hpp | 52 +- .../experimental/__p1684_bits/mdarray.hpp | 221 ++- .../__p2630_bits/strided_slice.hpp | 49 + .../experimental/__p2630_bits/submdspan.hpp | 42 + .../__p2630_bits/submdspan_extents.hpp | 323 ++++ .../__p2630_bits/submdspan_mapping.hpp | 299 +++ .../tpls/mdspan/include/experimental/mdarray | 48 - .../tpls/mdspan/include/experimental/mdspan | 56 - .../tpls/mdspan/include/mdspan/mdarray.hpp | 31 + .../tpls/mdspan/include/mdspan/mdspan.hpp | 41 + 493 files changed, 41548 insertions(+), 15629 deletions(-) create mode 100644 lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp create mode 100644 lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp create mode 100644 lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp create mode 100644 lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp create mode 100644 lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp create mode 100644 lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp create mode 100644 lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MustUseKokkosSingleInTeam.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestSortCustomComp.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAllOf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAnyOf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyBackward.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy_n.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCount.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCountIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill_n.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFind.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEach.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEachN.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate_n.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMove.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMoveBackward.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamNoneOf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemove.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplace.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverse.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverseCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotate.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotateCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftLeft.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftRight.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSwapRanges.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformBinaryOp.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformUnaryOp.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUnique.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUniqueCopy.cpp create mode 100644 lib/kokkos/benchmarks/CMakeLists.txt create mode 100644 lib/kokkos/benchmarks/gups/CMakeLists.txt delete mode 100644 lib/kokkos/benchmarks/gups/Makefile delete mode 100644 lib/kokkos/benchmarks/gups/gups-kokkos.cpp create mode 100644 lib/kokkos/benchmarks/gups/gups.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTest_MallocFree.cpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNodeKernel.hpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNode_Impl.hpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Abort.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Assert.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Printf.hpp delete mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp rename lib/kokkos/core/src/SYCL/{Kokkos_SYCL_Parallel_Range.hpp => Kokkos_SYCL_ParallelFor_MDRange.hpp} (61%) create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp rename lib/kokkos/core/src/SYCL/{Kokkos_SYCL_Parallel_Scan.hpp => Kokkos_SYCL_ParallelScan_Range.hpp} (95%) delete mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp delete mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_TeamPolicy.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_ParallelFor_MDRange.hpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_ParallelFor_Range.hpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_ParallelFor_Team.hpp rename lib/kokkos/core/src/Threads/{Kokkos_Threads_Parallel_MDRange.hpp => Kokkos_Threads_ParallelReduce_MDRange.hpp} (65%) create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_ParallelReduce_Range.hpp rename lib/kokkos/core/src/Threads/{Kokkos_Threads_Parallel_Team.hpp => Kokkos_Threads_ParallelReduce_Team.hpp} (59%) create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_ParallelScan_Range.hpp delete mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel_Range.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Abort.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_CheckedIntegerOps.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Half_MathematicalFunctions.hpp create mode 100644 lib/kokkos/core/unit_test/TestArrayOps.hpp create mode 100644 lib/kokkos/core/unit_test/TestAtomicOperations_unsignedlonglongint.hpp create mode 100644 lib/kokkos/core/unit_test/TestCheckedIntegerOps.hpp create mode 100644 lib/kokkos/core/unit_test/TestPrintf.hpp create mode 100644 lib/kokkos/simd/src/Kokkos_SIMD_Common_Math.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp delete mode 100644 lib/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal delete mode 100644 lib/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic delete mode 100644 lib/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_forceglobal delete mode 100644 lib/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_generic delete mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/maybe_static_value.hpp delete mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/standard_layout_static_array.hpp delete mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/static_array.hpp delete mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/submdspan.hpp create mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/strided_slice.hpp create mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan.hpp create mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp create mode 100644 lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp delete mode 100644 lib/kokkos/tpls/mdspan/include/experimental/mdarray delete mode 100644 lib/kokkos/tpls/mdspan/include/experimental/mdspan create mode 100644 lib/kokkos/tpls/mdspan/include/mdspan/mdarray.hpp create mode 100644 lib/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 4c145c44b3..c6115f4b3d 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,6 +1,97 @@ # CHANGELOG -## [4.1.00](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-06-16) +## [4.2.00](https://github.com/kokkos/kokkos/tree/4.2.00) (2023-11-06) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.1.00...4.2.00) + +### Features: +- SIMD: significant improvements to SIMD support and alignment with C++26 SIMD + - add `Kokkos::abs` overload for SIMD types [\#6069](https://github.com/kokkos/kokkos/pull/6069) + - add generator constructors [\#6347](https://github.com/kokkos/kokkos/pull/6347) + - convert binary operators to hidden friends [\#6320](https://github.com/kokkos/kokkos/pull/6320) + - add shift operators [\#6109](https://github.com/kokkos/kokkos/pull/6109) + - add `float` support [\#6177](https://github.com/kokkos/kokkos/pull/6177) + - add remaining `gather_from` and `scatter_to` overloads [\#6220](https://github.com/kokkos/kokkos/pull/6220) + - define simd math function overloads in the Kokkos namespace [\#6465](https://github.com/kokkos/kokkos/pull/6465), [\#6487](https://github.com/kokkos/kokkos/pull/6487) + - `Kokkos_ENABLE_NATIVE=ON` autodetects SIMD types supported [\#6188](https://github.com/kokkos/kokkos/pull/6188) + - fix AVX2 SIMD support for ZEN2 AMD CPU [\#6238](https://github.com/kokkos/kokkos/pull/6238) +- `Kokkos::printf` [\#6083](https://github.com/kokkos/kokkos/pull/6083) +- `Kokkos::sort`: support custom comparator [\#6253](https://github.com/kokkos/kokkos/pull/6253) +- `half_t` and `bhalf_t` numeric traits [\#5778](https://github.com/kokkos/kokkos/pull/5778) +- `half_t` and `bhalf_t` mixed comparisons [\#6407](https://github.com/kokkos/kokkos/pull/6407) +- `half_t` and `bhalf_t` mathematical functions [\#6124](https://github.com/kokkos/kokkos/pull/6124) +- `TeamThreadRange` `parallel_scan` with return value [\#6090](https://github.com/kokkos/kokkos/pull/6090), [\#6301](https://github.com/kokkos/kokkos/pull/6301), [\#6302](https://github.com/kokkos/kokkos/pull/6302), [\#6303](https://github.com/kokkos/kokkos/pull/6303), [\#6307](https://github.com/kokkos/kokkos/pull/6307) +- `ThreadVectorRange` `parallel_scan` with return value [\#6235](https://github.com/kokkos/kokkos/pull/6235), [\#6242](https://github.com/kokkos/kokkos/pull/6242), [\#6308](https://github.com/kokkos/kokkos/pull/6308), [\#6305](https://github.com/kokkos/kokkos/pull/6305), [\#6292](https://github.com/kokkos/kokkos/pull/6292) +- Add team-level std algorithms [\#6200](https://github.com/kokkos/kokkos/pull/6200), [\#6205](https://github.com/kokkos/kokkos/pull/6205), [\#6207](https://github.com/kokkos/kokkos/pull/6207), [\#6208](https://github.com/kokkos/kokkos/pull/6208), [\#6209](https://github.com/kokkos/kokkos/pull/6209), [\#6210](https://github.com/kokkos/kokkos/pull/6210), [\#6211](https://github.com/kokkos/kokkos/pull/6211), [\#6212](https://github.com/kokkos/kokkos/pull/6212), [\#6213](https://github.com/kokkos/kokkos/pull/6213), [\#6256](https://github.com/kokkos/kokkos/pull/6256), [\#6258](https://github.com/kokkos/kokkos/pull/6258), [\#6350](https://github.com/kokkos/kokkos/pull/6350), [\#6351](https://github.com/kokkos/kokkos/pull/6351) +- Serial: Allow for distinct execution space instances [\#6441](https://github.com/kokkos/kokkos/pull/6441) + +### Backend and Architecture Enhancements: + +#### CUDA: +- Fixed potential data race in Cuda `parallel_reduce` [\#6236](https://github.com/kokkos/kokkos/pull/6236) +- Use `cudaMallocAsync` by default [\#6402](https://github.com/kokkos/kokkos/pull/6402) +- Bugfix for using Kokkos from a thread of execution [\#6299](https://github.com/kokkos/kokkos/pull/6299) + +#### HIP: +- New naming convention for AMD GPU: VEGA906, VEGA908, VEGA90A, NAVI1030 to AMD_GFX906, AMD_GFX908, AMD_GFX90A, AMD_GFX1030 [\#6266](https://github.com/kokkos/kokkos/pull/6266) +- Add initial support for gfx942: [\#6358](https://github.com/kokkos/kokkos/pull/6358) +- Improve reduction performance [\#6229](https://github.com/kokkos/kokkos/pull/6229) +- Deprecate `HIP(hipStream_t,bool)` constructor [\#6401](https://github.com/kokkos/kokkos/pull/6401) +- Add support for Graph [\#6370](https://github.com/kokkos/kokkos/pull/6370) +- Improve reduction performance when using Teams [\#6284](https://github.com/kokkos/kokkos/pull/6284) +- Fix concurrency calculation [\#6479](https://github.com/kokkos/kokkos/pull/6479) +- Fix potential data race in HIP `parallel_reduce` [\#6429](https://github.com/kokkos/kokkos/pull/6429) + +#### SYCL: +- Enforce external `sycl::queues` to be in-order [\#6246](https://github.com/kokkos/kokkos/pull/6246) +- Improve reduction performance: [\#6272](https://github.com/kokkos/kokkos/pull/6272) [\#6271](https://github.com/kokkos/kokkos/pull/6271) [\#6270](https://github.com/kokkos/kokkos/pull/6270) [\#6264](https://github.com/kokkos/kokkos/pull/6264) +- Allow using the SYCL execution space on AMD GPUs [\#6321](https://github.com/kokkos/kokkos/pull/6321) +- Allow sorting via native oneDPL to support Views with stride=1 [\#6322](https://github.com/kokkos/kokkos/pull/6322) +- Make in-order queues the default via macro [\#6189](https://github.com/kokkos/kokkos/pull/6189) + +#### OpenACC: +- Support Clacc compiler [\#6250](https://github.com/kokkos/kokkos/pull/6250) + +### General Enhancements +- Add missing `is_*_view` traits and `is_*_view_v` helper variable templates for `DynRankView`, `DynamicView`, `OffsetView`, `ScatterView` containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) +- Make `nvcc_wrapper` and `compiler_launcher` scripts more portable by switching to a `#!/usr/bin/env` shebang [\#6357](https://github.com/kokkos/kokkos/pull/6357) +- Add an improved `Kokkos::malloc` / `Kokkos::free` performance test [\#6377](https://github.com/kokkos/kokkos/pull/6377) +- Ensure `Views` with `size==0` can be used with `deep_copy` [\#6273](https://github.com/kokkos/kokkos/pull/6273) +- `Kokkos::abort` is moved to header `Kokkos_Abort.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- `KOKKOS_ASSERT`, `KOKKOS_EXPECTS`, `KOKKOS_ENSURES` are moved to header `Kokkos_Assert.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- Add a permuted-index mode to the gups benchmark [\#6378](https://github.com/kokkos/kokkos/pull/6378) +- Check for overflow during backend initialization [\#6159](https://github.com/kokkos/kokkos/pull/6159) +- Make constraints on `Kokkos::sort` more visible [\#6234](https://github.com/kokkos/kokkos/pull/6234) and cleanup API [\#6239](https://github.com/kokkos/kokkos/pull/6239) +- Add converting assignment to `DualView`: [\#6474](https://github.com/kokkos/kokkos/pull/6474) + + +### Build System Changes + +- Export `Kokkos_CXX_COMPILER_VERSION` [\#6282](https://github.com/kokkos/kokkos/pull/6282) +- Disable default oneDPL support in Trilinos [\#6342](https://github.com/kokkos/kokkos/pull/6342) + +### Incompatibilities (i.e. breaking changes) + - Ensure that `Kokkos::complex` only gets instantiated for cv-unqualified floating-point types [\#6251](https://github.com/kokkos/kokkos/pull/6251) + - Removed (deprecated-3) support for volatile join operators in reductions [\#6385](https://github.com/kokkos/kokkos/pull/6385) + - Enforce `ViewCtorArgs` restrictions for `create_mirror_view` [\#6304](https://github.com/kokkos/kokkos/pull/6304) + - SIMD types for ARM NEON are not autodetected anymore but need `Kokkos_ARCH_ARM_NEON` or `Kokkos_ARCH_NATIVE=ON` [\#6394](https://github.com/kokkos/kokkos/pull/6394) + - Remove `#include ` from headers where possible [\#6482](https://github.com/kokkos/kokkos/pull/6482) + +### Deprecations +- Deprecated `Kokkos::vector` [\#6252](https://github.com/kokkos/kokkos/pull/6252) +- All host allocation mechanisms except for `STD_MALLOC` have been deprecated [\#6341](https://github.com/kokkos/kokkos/pull/6341) + +### Bug Fixes + - Missing memory fence in `RandomPool::free_state` functions [\#6290](https://github.com/kokkos/kokkos/pull/6290) + - Fix for corner case in `Kokkos::Experimental::is_partitioned` algorithm [\#6257](https://github.com/kokkos/kokkos/pull/6257) + - Fix initialization of scratch lock variables in the `Cuda` backend [\#6433](https://github.com/kokkos/kokkos/pull/6433) + - Fixes for `Kokkos::Array` [\#6372](https://github.com/kokkos/kokkos/pull/6372) + - Fixed symlink configure issue for Windows [\#6241](https://github.com/kokkos/kokkos/pull/6241) + - OpenMPTarget init-join fix [\#6444](https://github.com/kokkos/kokkos/pull/6444) + - Fix atomic operations bug for Min and Max [\#6435](https://github.com/kokkos/kokkos/pull/6435) + - Fix implementation for `cyl_bessel_i0` [\#6484](https://github.com/kokkos/kokkos/pull/6484) + - Fix various NVCC warnings in `BinSort`, `Array`, and bit manipulation function templates [\#6483](https://github.com/kokkos/kokkos/pull/6483) + +## [4.1.00](https://github.com/kokkos/kokkos/tree/4.1.00) (2023-06-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.01...4.1.00) ### Features: diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 895cee6a08..f6bd81058e 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -150,8 +150,8 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 1) -set(Kokkos_VERSION_PATCH 00) +set(Kokkos_VERSION_MINOR 2) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") @@ -314,7 +314,6 @@ KOKKOS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() KOKKOS_CONFIGURE_CORE() diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 46998091fe..c970f72755 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS) endif KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 1 -KOKKOS_VERSION_PATCH = 00 +KOKKOS_VERSION_MINOR = 2 +KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 -# AMD-GPUS: Vega906,Vega908,Vega90A,Navi1030 +# AMD-GPUS: GFX906,GFX908,GFX90A,GFX942,GFX1030,GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" @@ -40,7 +40,7 @@ KOKKOS_TRIBITS ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. -# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr +# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async KOKKOS_CUDA_OPTIONS ?= "enable_lambda" # Options: rdc @@ -92,6 +92,7 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS), KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) +KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),disable_malloc_async) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) # deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) @@ -412,10 +413,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) endif endif -KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906) -KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega908) -KOKKOS_INTERNAL_USE_ARCH_VEGA90A := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega90A) -KOKKOS_INTERNAL_USE_ARCH_NAVI1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),Navi1030) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) @@ -698,6 +700,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND") endif + + ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC") + else + tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */") + endif endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) @@ -710,6 +718,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -722,6 +731,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -734,6 +744,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_A64FX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") KOKKOS_CXXFLAGS += -march=armv8.2-a+sve KOKKOS_LDFLAGS += -march=armv8.2-a+sve @@ -749,7 +760,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -762,7 +773,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -775,7 +786,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -789,6 +800,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -802,6 +814,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -1085,29 +1098,34 @@ endif # Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx942 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 4e08a46c69..ec8770dd7d 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -36,6 +36,8 @@ Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp +Kokkos_Abort.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt index ab557ab66a..368984647e 100644 --- a/lib/kokkos/algorithms/CMakeLists.txt +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -2,6 +2,6 @@ IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() # FIXME_OPENACC: temporarily disabled due to unimplemented features -IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET OR KOKKOS_ENABLE_OPENACC) AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) +IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) ENDIF() diff --git a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp index 4c8be792d8..18e0674efe 100644 --- a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp @@ -14,175 +14,17 @@ // //@HEADER -#ifndef KOKKOS_NESTEDSORT_HPP_ -#define KOKKOS_NESTEDSORT_HPP_ - -#include -#include -#include - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// true for TeamVectorRange, false for ThreadVectorRange -template -struct NestedRange {}; - -// Specialization for team-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::TeamVectorRange(t, len); - } - template - KOKKOS_FUNCTION static void barrier(const TeamMember& t) { - t.team_barrier(); - } -}; - -// Specialization for thread-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::ThreadVectorRange(t, len); - } - // Barrier is no-op, as vector lanes of a thread are implicitly synchronized - // after parallel region - template - KOKKOS_FUNCTION static void barrier(const TeamMember&) {} -}; - -// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. -// This only takes the NestedRange instance for template arg deduction. -template -KOKKOS_INLINE_FUNCTION void sort_nested_impl( - const TeamMember& t, const KeyViewType& keyView, - [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, - const NestedRange) { - using SizeType = typename KeyViewType::size_type; - using KeyType = typename KeyViewType::non_const_value_type; - using Range = NestedRange; - SizeType n = keyView.extent(0); - SizeType npot = 1; - SizeType levels = 0; - // FIXME: ceiling power-of-two is a common thing to need - make it a utility - while (npot < n) { - levels++; - npot <<= 1; - } - for (SizeType i = 0; i < levels; i++) { - for (SizeType j = 0; j <= i; j++) { - // n/2 pairs of items are compared in parallel - Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { - // How big are the brown/pink boxes? - // (Terminology comes from Wikipedia diagram) - // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg - SizeType boxSize = SizeType(2) << (i - j); - // Which box contains this thread? - SizeType boxID = k >> (i - j); // k * 2 / boxSize; - SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize - SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; - SizeType elem1 = boxStart + boxOffset; - // In first phase (j == 0, brown box): within a box, compare with the - // opposite value in the box. - // In later phases (j > 0, pink box): within a box, compare with fixed - // distance (boxSize / 2) apart. - SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) - : (elem1 + boxSize / 2); - if (elem2 < n) { - KeyType key1 = keyView(elem1); - KeyType key2 = keyView(elem2); - if (comp(key2, key1)) { - keyView(elem1) = key2; - keyView(elem2) = key1; - if constexpr (!std::is_same_v) { - Kokkos::Experimental::swap(valueView(elem1), valueView(elem2)); - } - } - } - }); - Range::barrier(t); - } - } -} - -} // namespace Impl - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} - -} // namespace Experimental -} // namespace Kokkos +#ifndef KOKKOS_NESTED_SORT_HPP_ +#define KOKKOS_NESTED_SORT_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif + +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" + +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif #endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index abb028d28e..2d7d236d2f 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -956,6 +956,8 @@ class Random_XorShift64_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift64& state) const { state_(state.state_idx_, 0) = state.state_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; @@ -1208,7 +1210,9 @@ class Random_XorShift1024_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift1024& state) const { for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; - p_(state.state_idx_, 0) = state.p_; + p_(state.state_idx_, 0) = state.p_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 10f9ad6462..f77484cc55 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -21,762 +21,9 @@ #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #endif -#include -#include -#include -#include - -#if defined(KOKKOS_ENABLE_CUDA) - -// Workaround for `Instruction 'shfl' without '.sync' is not supported on -// .target sm_70 and higher from PTX ISA version 6.4`. -// Also see https://github.com/NVIDIA/cub/pull/170. -#if !defined(CUB_USE_COOPERATIVE_GROUPS) -#define CUB_USE_COOPERATIVE_GROUPS -#endif - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" - -#if defined(KOKKOS_COMPILER_CLANG) -// Some versions of Clang fail to compile Thrust, failing with errors like -// this: -// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: -// error: use of undeclared identifier 'va_printf' -// The exact combination of versions for Clang and Thrust (or CUDA) for this -// failure was not investigated, however even very recent version combination -// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. -// -// Defining _CubLog here locally allows us to avoid that code path, however -// disabling some debugging diagnostics -#pragma push_macro("_CubLog") -#ifdef _CubLog -#undef _CubLog -#endif -#define _CubLog -#include -#include -#pragma pop_macro("_CubLog") -#else -#include -#include -#endif - -#pragma GCC diagnostic pop - -#endif - -#if defined(KOKKOS_ENABLE_ONEDPL) -#include -#include -#endif - -namespace Kokkos { - -namespace Impl { - -template -struct CopyOp; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - dst(i_dst) = src(i_src); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < dst.extent(1); j++) - for (int k = 0; k < dst.extent(2); k++) - dst(i_dst, j, k) = src(i_src, j, k); - } -}; -} // namespace Impl - -//---------------------------------------------------------------------------- - -template -class BinSort { - public: - template - struct copy_functor { - using src_view_type = typename SrcViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - src_view_type src_values; - int dst_offset; - - copy_functor(DstViewType const& dst_values_, int const& dst_offset_, - SrcViewType const& src_values_) - : dst_values(dst_values_), - src_values(src_values_), - dst_offset(dst_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i + dst_offset, src_values, i); - } - }; - - template - struct copy_permute_functor { - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using src_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View -#endif - >, - typename SrcViewType::const_type>; - - using perm_view_type = typename PermuteViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - perm_view_type sort_order; - src_view_type src_values; - int src_offset; - - copy_permute_functor(DstViewType const& dst_values_, - PermuteViewType const& sort_order_, - SrcViewType const& src_values_, int const& src_offset_) - : dst_values(dst_values_), - sort_order(sort_order_), - src_values(src_values_), - src_offset(src_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); - } - }; - - // Naming this alias "execution_space" would be problematic since it would be - // considered as execution space for the various functors which might use - // another execution space through sort() or create_permute_vector(). - using exec_space = typename Space::execution_space; - using bin_op_type = BinSortOp; - - struct bin_count_tag {}; - struct bin_offset_tag {}; - struct bin_binning_tag {}; - struct bin_sort_bins_tag {}; - - public: - using size_type = SizeType; - using value_type = size_type; - - using offset_type = Kokkos::View; - using bin_count_type = Kokkos::View; - - using const_key_view_type = typename KeyViewType::const_type; - - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using const_rnd_key_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View >, - const_key_view_type>; - - using non_const_key_scalar = typename KeyViewType::non_const_value_type; - using const_key_scalar = typename KeyViewType::const_value_type; - - using bin_count_atomic_type = - Kokkos::View >; - - private: - const_key_view_type keys; - const_rnd_key_view_type keys_rnd; - - public: - BinSortOp bin_op; - offset_type bin_offsets; - bin_count_atomic_type bin_count_atomic; - bin_count_type bin_count_const; - offset_type sort_order; - - int range_begin; - int range_end; - bool sort_within_bins; - - public: -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinSort() = default; -#else - BinSort() = delete; -#endif - - //---------------------------------------- - // Constructor: takes the keys, the binning_operator and optionally whether to - // sort within bins (default false) - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - int range_begin_, int range_end_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : keys(keys_), - keys_rnd(keys_), - bin_op(bin_op_), - bin_offsets(), - bin_count_atomic(), - bin_count_const(), - sort_order(), - range_begin(range_begin_), - range_end(range_end_), - sort_within_bins(sort_within_bins_) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - if (bin_op.max_bins() <= 0) - Kokkos::abort( - "The number of bins in the BinSortOp object must be greater than 0!"); - bin_count_atomic = Kokkos::View( - "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); - bin_count_const = bin_count_atomic; - bin_offsets = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), - bin_op.max_bins()); - sort_order = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sort_order"), - range_end - range_begin); - } - - BinSort(const_key_view_type keys_, int range_begin_, int range_end_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, - sort_within_bins_) {} - - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} - - BinSort(const_key_view_type keys_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} - - //---------------------------------------- - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - template - void create_permute_vector(const ExecutionSpace& exec) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - - const size_t len = range_end - range_begin; - Kokkos::parallel_for( - "Kokkos::Sort::BinCount", - Kokkos::RangePolicy(exec, 0, len), - *this); - Kokkos::parallel_scan("Kokkos::Sort::BinOffset", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - - Kokkos::deep_copy(exec, bin_count_atomic, 0); - Kokkos::parallel_for( - "Kokkos::Sort::BinBinning", - Kokkos::RangePolicy(exec, 0, len), - *this); - - if (sort_within_bins) - Kokkos::parallel_for( - "Kokkos::Sort::BinSort", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - } - - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - void create_permute_vector() { - Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); - exec_space e{}; - create_permute_vector(e); - e.fence("Kokkos::Binsort::create_permute_vector: after"); - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(const ExecutionSpace& exec, ValuesViewType const& values, - int values_range_begin, int values_range_end) const { - if (values.extent(0) == 0) { - return; - } - - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename ValuesViewType::memory_space>::accessible, - "The provided execution space must be able to access the memory space " - "of the View argument!"); - - const size_t len = range_end - range_begin; - const size_t values_len = values_range_end - values_range_begin; - if (len != values_len) { - Kokkos::abort( - "BinSort::sort: values range length != permutation vector length"); - } - - using scratch_view_type = - Kokkos::View; - scratch_view_type sorted_values( - view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sorted_values"), - values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 1 ? values.extent(1) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 2 ? values.extent(2) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 3 ? values.extent(3) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 4 ? values.extent(4) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 5 ? values.extent(5) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 6 ? values.extent(6) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 7 ? values.extent(7) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG); - - { - copy_permute_functor - functor(sorted_values, sort_order, values, - values_range_begin - range_begin); - - parallel_for("Kokkos::Sort::CopyPermute", - Kokkos::RangePolicy(exec, 0, len), functor); - } - - { - copy_functor functor( - values, range_begin, sorted_values); - - parallel_for("Kokkos::Sort::Copy", - Kokkos::RangePolicy(exec, 0, len), functor); - } - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(ValuesViewType const& values, int values_range_begin, - int values_range_end) const { - Kokkos::fence("Kokkos::Binsort::sort: before"); - exec_space exec; - sort(exec, values, values_range_begin, values_range_end); - exec.fence("Kokkos::BinSort:sort: after"); - } - - template - void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { - this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - template - void sort(ValuesViewType const& values) const { - this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - // Get the permutation vector - KOKKOS_INLINE_FUNCTION - offset_type get_permute_vector() const { return sort_order; } - - // Get the start offsets for each bin - KOKKOS_INLINE_FUNCTION - offset_type get_bin_offsets() const { return bin_offsets; } - - // Get the count for each bin - KOKKOS_INLINE_FUNCTION - bin_count_type get_bin_count() const { return bin_count_const; } - - public: - KOKKOS_INLINE_FUNCTION - void operator()(const bin_count_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - bin_count_atomic(bin_op.bin(keys, j))++; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_offset_tag& /*tag*/, const int i, - value_type& offset, const bool& final) const { - if (final) { - bin_offsets(i) = offset; - } - offset += bin_count_const(i); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_binning_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - const int bin = bin_op.bin(keys, j); - const int count = bin_count_atomic(bin)++; - - sort_order(bin_offsets(bin) + count) = j; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { - auto bin_size = bin_count_const(i); - if (bin_size <= 1) return; - constexpr bool use_std_sort = - std::is_same_v; - int lower_bound = bin_offsets(i); - int upper_bound = lower_bound + bin_size; - // Switching to std::sort for more than 10 elements has been found - // reasonable experimentally. - if (use_std_sort && bin_size > 10) { - if constexpr (use_std_sort) { - std::sort(&sort_order(lower_bound), &sort_order(upper_bound), - [this](int p, int q) { return bin_op(keys_rnd, p, q); }); - } - } else { - for (int k = lower_bound + 1; k < upper_bound; ++k) { - int old_idx = sort_order(k); - int j = k - 1; - while (j >= lower_bound) { - int new_idx = sort_order(j); - if (!bin_op(keys_rnd, old_idx, new_idx)) break; - sort_order(j + 1) = new_idx; - --j; - } - sort_order(j + 1) = old_idx; - } - } - } -}; - -//---------------------------------------------------------------------------- - -template -struct BinOp1D { - int max_bins_ = {}; - double mul_ = {}; - double min_ = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp1D() = default; -#else - BinOp1D() = delete; -#endif - - // Construct BinOp with number of bins, minimum value and maximum value - BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, - typename KeyViewType::const_value_type max) - : max_bins_(max_bins__ + 1), - // Cast to double to avoid possible overflow when using integer - mul_(static_cast(max_bins__) / - (static_cast(max) - static_cast(min))), - min_(static_cast(min)) { - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - if (std::is_integral::value && - (static_cast(max) - static_cast(min)) <= - static_cast(max_bins__)) { - mul_ = 1.; - } - } - - // Determine bin index from key value - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return static_cast(mul_ * (static_cast(keys(i)) - min_)); - } - - // Return maximum bin index + 1 - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_; } - - // Compare to keys within a bin if true new_val will be put before old_val - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - return keys(i1) < keys(i2); - } -}; - -template -struct BinOp3D { - int max_bins_[3] = {}; - double mul_[3] = {}; - double min_[3] = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp3D() = default; -#else - BinOp3D() = delete; -#endif - - BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], - typename KeyViewType::const_value_type max[]) { - max_bins_[0] = max_bins__[0]; - max_bins_[1] = max_bins__[1]; - max_bins_[2] = max_bins__[2]; - mul_[0] = static_cast(max_bins__[0]) / - (static_cast(max[0]) - static_cast(min[0])); - mul_[1] = static_cast(max_bins__[1]) / - (static_cast(max[1]) - static_cast(min[1])); - mul_[2] = static_cast(max_bins__[2]) / - (static_cast(max[2]) - static_cast(min[2])); - min_[0] = static_cast(min[0]); - min_[1] = static_cast(min[1]); - min_[2] = static_cast(min[2]); - } - - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + - int(mul_[1] * (keys(i, 1) - min_[1]))) * - max_bins_[2]) + - int(mul_[2] * (keys(i, 2) - min_[2]))); - } - - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } - - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - if (keys(i1, 0) > keys(i2, 0)) - return true; - else if (keys(i1, 0) == keys(i2, 0)) { - if (keys(i1, 1) > keys(i2, 1)) - return true; - else if (keys(i1, 1) == keys(i2, 1)) { - if (keys(i1, 2) > keys(i2, 2)) return true; - } - } - return false; - } -}; - -namespace Impl { - -template -struct min_max_functor { - using minmax_scalar = - Kokkos::MinMaxScalar; - - ViewType view; - min_max_functor(const ViewType& view_) : view(view_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i, minmax_scalar& minmax) const { - if (view(i) < minmax.min_val) minmax.min_val = view(i); - if (view(i) > minmax.max_val) minmax.max_val = view(i); - } -}; - -} // namespace Impl - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (!SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace& exec, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent", - Kokkos::RangePolicy( - exec, 0, view.extent(0)), - Impl::min_max_functor(view), reducer); - if (result.min_val == result.max_val) return; - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - bool sort_in_bins = true; - // TODO: figure out better max_bins then this ... - int64_t max_bins = view.extent(0) / 2; - if (std::is_integral::value) { - // Cast to double to avoid possible overflow when using integer - auto const max_val = static_cast(result.max_val); - auto const min_val = static_cast(result.min_val); - // using 10M as the cutoff for special behavior (roughly 40MB for the count - // array) - if ((max_val - min_val) < 10000000) { - max_bins = max_val - min_val + 1; - sort_in_bins = false; - } - } - if (std::is_floating_point::value) { - KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - - static_cast(result.min_val))); - } - - BinSort bin_sort( - view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view); -} - -#if defined(KOKKOS_ENABLE_ONEDPL) -template -void sort(const Experimental::SYCL& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - static_assert(SpaceAccessibility::accessible, - "SYCL execution space is not able to access the memory space " - "of the View argument!"); - - auto queue = space.sycl_queue(); - auto policy = oneapi::dpl::execution::make_device_policy(queue); - - // Can't use Experimental::begin/end here since the oneDPL then assumes that - // the data is on the host. - static_assert( - ViewType::rank == 1 && - (std::is_same::value || - std::is_same::value), - "SYCL sort only supports contiguous 1D Views."); - const int n = view.extent(0); - oneapi::dpl::sort(policy, view.data(), view.data() + n); -} -#endif - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace&, const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - std::sort(first, last); -} - -#if defined(KOKKOS_ENABLE_CUDA) -template -void sort(const Cuda& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - const auto exec = thrust::cuda::par.on(space.cuda_stream()); - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - thrust::sort(exec, first, last); -} -#endif - -template -void sort(ViewType const& view) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view); - exec.fence("Kokkos::sort: fence after sorting"); -} - -template -std::enable_if_t::value> sort( - const ExecutionSpace& exec, ViewType view, size_t const begin, - size_t const end) { - if (view.extent(0) == 0) { - return; - } - - using range_policy = Kokkos::RangePolicy; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - - parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), - Impl::min_max_functor(view), reducer); - - if (result.min_val == result.max_val) return; - - BinSort bin_sort( - exec, view, begin, end, - CompType((end - begin) / 2, result.min_val, result.max_val), true); - - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view, begin, end); -} - -template -void sort(ViewType view, size_t const begin, size_t const end) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view, begin, end); - exec.fence("Kokkos::Sort: fence after sorting"); -} - -} // namespace Kokkos +#include "sorting/Kokkos_BinSortPublicAPI.hpp" +#include "sorting/Kokkos_SortPublicAPI.hpp" +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #undef KOKKOS_IMPL_PUBLIC_INCLUDE diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp new file mode 100644 index 0000000000..73e751f572 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp @@ -0,0 +1,129 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_OPS_PUBLIC_API_HPP_ +#define KOKKOS_BIN_OPS_PUBLIC_API_HPP_ + +#include +#include + +namespace Kokkos { + +template +struct BinOp1D { + int max_bins_ = {}; + double mul_ = {}; + double min_ = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp1D() = default; +#else + BinOp1D() = delete; +#endif + + // Construct BinOp with number of bins, minimum value and maximum value + BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + typename KeyViewType::const_value_type max) + : max_bins_(max_bins__ + 1), + // Cast to double to avoid possible overflow when using integer + mul_(static_cast(max_bins__) / + (static_cast(max) - static_cast(min))), + min_(static_cast(min)) { + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + if (std::is_integral::value && + (static_cast(max) - static_cast(min)) <= + static_cast(max_bins__)) { + mul_ = 1.; + } + } + + // Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return static_cast(mul_ * (static_cast(keys(i)) - min_)); + } + + // Return maximum bin index + 1 + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_; } + + // Compare to keys within a bin if true new_val will be put before old_val + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + return keys(i1) < keys(i2); + } +}; + +template +struct BinOp3D { + int max_bins_[3] = {}; + double mul_[3] = {}; + double min_[3] = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp3D() = default; +#else + BinOp3D() = delete; +#endif + + BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[]) { + max_bins_[0] = max_bins__[0]; + max_bins_[1] = max_bins__[1]; + max_bins_[2] = max_bins__[2]; + mul_[0] = static_cast(max_bins__[0]) / + (static_cast(max[0]) - static_cast(min[0])); + mul_[1] = static_cast(max_bins__[1]) / + (static_cast(max[1]) - static_cast(min[1])); + mul_[2] = static_cast(max_bins__[2]) / + (static_cast(max[2]) - static_cast(min[2])); + min_[0] = static_cast(min[0]); + min_[1] = static_cast(min[1]); + min_[2] = static_cast(min[2]); + } + + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + + int(mul_[1] * (keys(i, 1) - min_[1]))) * + max_bins_[2]) + + int(mul_[2] * (keys(i, 2) - min_[2]))); + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } + + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + if (keys(i1, 0) > keys(i2, 0)) + return true; + else if (keys(i1, 0) == keys(i2, 0)) { + if (keys(i1, 1) > keys(i2, 1)) + return true; + else if (keys(i1, 1) == keys(i2, 1)) { + if (keys(i1, 2) > keys(i2, 2)) return true; + } + } + return false; + } +}; + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp new file mode 100644 index 0000000000..c399279fe4 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp @@ -0,0 +1,410 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_SORT_PUBLIC_API_HPP_ +#define KOKKOS_BIN_SORT_PUBLIC_API_HPP_ + +#include "Kokkos_BinOpsPublicAPI.hpp" +#include "impl/Kokkos_CopyOpsForBinSortImpl.hpp" +#include +#include + +namespace Kokkos { + +template +class BinSort { + public: + template + struct copy_functor { + using src_view_type = typename SrcViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + src_view_type src_values; + int dst_offset; + + copy_functor(DstViewType const& dst_values_, int const& dst_offset_, + SrcViewType const& src_values_) + : dst_values(dst_values_), + src_values(src_values_), + dst_offset(dst_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i + dst_offset, src_values, i); + } + }; + + template + struct copy_permute_functor { + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using src_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View= 230700) + , + Kokkos::MemoryTraits +#endif + >, + typename SrcViewType::const_type>; + + using perm_view_type = typename PermuteViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + perm_view_type sort_order; + src_view_type src_values; + int src_offset; + + copy_permute_functor(DstViewType const& dst_values_, + PermuteViewType const& sort_order_, + SrcViewType const& src_values_, int const& src_offset_) + : dst_values(dst_values_), + sort_order(sort_order_), + src_values(src_values_), + src_offset(src_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); + } + }; + + // Naming this alias "execution_space" would be problematic since it would be + // considered as execution space for the various functors which might use + // another execution space through sort() or create_permute_vector(). + using exec_space = typename Space::execution_space; + using bin_op_type = BinSortOp; + + struct bin_count_tag {}; + struct bin_offset_tag {}; + struct bin_binning_tag {}; + struct bin_sort_bins_tag {}; + + public: + using size_type = SizeType; + using value_type = size_type; + + using offset_type = Kokkos::View; + using bin_count_type = Kokkos::View; + + using const_key_view_type = typename KeyViewType::const_type; + + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using const_rnd_key_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View >, + const_key_view_type>; + + using non_const_key_scalar = typename KeyViewType::non_const_value_type; + using const_key_scalar = typename KeyViewType::const_value_type; + + using bin_count_atomic_type = + Kokkos::View >; + + private: + const_key_view_type keys; + const_rnd_key_view_type keys_rnd; + + public: + BinSortOp bin_op; + offset_type bin_offsets; + bin_count_atomic_type bin_count_atomic; + bin_count_type bin_count_const; + offset_type sort_order; + + int range_begin; + int range_end; + bool sort_within_bins; + + public: +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinSort() = default; +#else + BinSort() = delete; +#endif + + //---------------------------------------- + // Constructor: takes the keys, the binning_operator and optionally whether to + // sort within bins (default false) + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + int range_begin_, int range_end_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : keys(keys_), + keys_rnd(keys_), + bin_op(bin_op_), + bin_offsets(), + bin_count_atomic(), + bin_count_const(), + sort_order(), + range_begin(range_begin_), + range_end(range_end_), + sort_within_bins(sort_within_bins_) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + if (bin_op.max_bins() <= 0) + Kokkos::abort( + "The number of bins in the BinSortOp object must be greater than 0!"); + bin_count_atomic = Kokkos::View( + "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), + bin_op.max_bins()); + sort_order = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sort_order"), + range_end - range_begin); + } + + BinSort(const_key_view_type keys_, int range_begin_, int range_end_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, + sort_within_bins_) {} + + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} + + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} + + //---------------------------------------- + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + template + void create_permute_vector(const ExecutionSpace& exec) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + + const size_t len = range_end - range_begin; + Kokkos::parallel_for( + "Kokkos::Sort::BinCount", + Kokkos::RangePolicy(exec, 0, len), + *this); + Kokkos::parallel_scan("Kokkos::Sort::BinOffset", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + + Kokkos::deep_copy(exec, bin_count_atomic, 0); + Kokkos::parallel_for( + "Kokkos::Sort::BinBinning", + Kokkos::RangePolicy(exec, 0, len), + *this); + + if (sort_within_bins) + Kokkos::parallel_for( + "Kokkos::Sort::BinSort", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + } + + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + void create_permute_vector() { + Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); + exec_space e{}; + create_permute_vector(e); + e.fence("Kokkos::Binsort::create_permute_vector: after"); + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(const ExecutionSpace& exec, ValuesViewType const& values, + int values_range_begin, int values_range_end) const { + if (values.extent(0) == 0) { + return; + } + + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + static_assert( + Kokkos::SpaceAccessibility< + ExecutionSpace, typename ValuesViewType::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "of the View argument!"); + + const size_t len = range_end - range_begin; + const size_t values_len = values_range_end - values_range_begin; + if (len != values_len) { + Kokkos::abort( + "BinSort::sort: values range length != permutation vector length"); + } + + using scratch_view_type = + Kokkos::View; + scratch_view_type sorted_values( + view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 1 ? values.extent(1) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 2 ? values.extent(2) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 3 ? values.extent(3) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 4 ? values.extent(4) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 5 ? values.extent(5) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 6 ? values.extent(6) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 7 ? values.extent(7) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG); + + { + copy_permute_functor + functor(sorted_values, sort_order, values, + values_range_begin - range_begin); + + parallel_for("Kokkos::Sort::CopyPermute", + Kokkos::RangePolicy(exec, 0, len), functor); + } + + { + copy_functor functor( + values, range_begin, sorted_values); + + parallel_for("Kokkos::Sort::Copy", + Kokkos::RangePolicy(exec, 0, len), functor); + } + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(ValuesViewType const& values, int values_range_begin, + int values_range_end) const { + Kokkos::fence("Kokkos::Binsort::sort: before"); + exec_space exec; + sort(exec, values, values_range_begin, values_range_end); + exec.fence("Kokkos::BinSort:sort: after"); + } + + template + void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { + this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + template + void sort(ValuesViewType const& values) const { + this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + // Get the permutation vector + KOKKOS_INLINE_FUNCTION + offset_type get_permute_vector() const { return sort_order; } + + // Get the start offsets for each bin + KOKKOS_INLINE_FUNCTION + offset_type get_bin_offsets() const { return bin_offsets; } + + // Get the count for each bin + KOKKOS_INLINE_FUNCTION + bin_count_type get_bin_count() const { return bin_count_const; } + + public: + KOKKOS_INLINE_FUNCTION + void operator()(const bin_count_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + bin_count_atomic(bin_op.bin(keys, j))++; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_offset_tag& /*tag*/, const int i, + value_type& offset, const bool& final) const { + if (final) { + bin_offsets(i) = offset; + } + offset += bin_count_const(i); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_binning_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + const int bin = bin_op.bin(keys, j); + const int count = bin_count_atomic(bin)++; + + sort_order(bin_offsets(bin) + count) = j; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { + auto bin_size = bin_count_const(i); + if (bin_size <= 1) return; + constexpr bool use_std_sort = + std::is_same_v; + int lower_bound = bin_offsets(i); + int upper_bound = lower_bound + bin_size; + // Switching to std::sort for more than 10 elements has been found + // reasonable experimentally. + if (use_std_sort && bin_size > 10) { + KOKKOS_IF_ON_HOST( + (std::sort(&sort_order(lower_bound), &sort_order(upper_bound), + [this](int p, int q) { return bin_op(keys_rnd, p, q); });)) + } else { + for (int k = lower_bound + 1; k < upper_bound; ++k) { + int old_idx = sort_order(k); + int j = k - 1; + while (j >= lower_bound) { + int new_idx = sort_order(j); + if (!bin_op(keys_rnd, old_idx, new_idx)) break; + sort_order(j + 1) = new_idx; + --j; + } + sort_order(j + 1) = old_idx; + } + } + } +}; + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp new file mode 100644 index 0000000000..dd468e0734 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp @@ -0,0 +1,100 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ +#define KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ + +#include "impl/Kokkos_NestedSortImpl.hpp" +#include +#include + +namespace Kokkos { +namespace Experimental { + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp new file mode 100644 index 0000000000..a763c41e58 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp @@ -0,0 +1,194 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_PUBLIC_API_HPP_ +#define KOKKOS_SORT_PUBLIC_API_HPP_ + +#include "./impl/Kokkos_SortImpl.hpp" +#include +#include +#include + +namespace Kokkos { + +// --------------------------------------------------------------- +// basic overloads +// --------------------------------------------------------------- + +template +void sort([[maybe_unused]] const ExecutionSpace& exec, + const Kokkos::View& view) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort without comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the " + "View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last); + } else { + Impl::sort_device_view_without_comparator(exec, view); + } +} + +template +void sort(const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view); + exec.fence("Kokkos::sort: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads supporting a custom comparator +// --------------------------------------------------------------- +template +void sort([[maybe_unused]] const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last, comparator); + } else { + Impl::sort_device_view_with_comparator(exec, view, comparator); + } +} + +template +void sort(const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + Kokkos::fence("Kokkos::sort with comparator: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, comparator); + exec.fence("Kokkos::sort with comparator: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads for sorting a view with a subrange +// specified via integers begin, end +// --------------------------------------------------------------- + +template +std::enable_if_t::value> sort( + const ExecutionSpace& exec, ViewType view, size_t const begin, + size_t const end) { + // view must be rank-1 because the Impl::min_max_functor + // used below only works for rank-1 views for now + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + using range_policy = Kokkos::RangePolicy; + using CompType = BinOp1D; + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + + parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), + Impl::min_max_functor(view), reducer); + + if (result.min_val == result.max_val) return; + + BinSort bin_sort( + exec, view, begin, end, + CompType((end - begin) / 2, result.min_val, result.max_val), true); + + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view, begin, end); +} + +template +void sort(ViewType view, size_t const begin, size_t const end) { + // same constraints as the overload above which this gets dispatched to + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, begin, end); + exec.fence("Kokkos::Sort: fence after sorting"); +} + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp new file mode 100644 index 0000000000..07f5926d82 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp @@ -0,0 +1,61 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ +#define KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ + +#include +#include + +namespace Kokkos { +namespace Impl { + +template +struct CopyOp; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + dst(i_dst) = src(i_src); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < dst.extent(1); j++) + for (int k = 0; k < dst.extent(2); k++) + dst(i_dst, j, k) = src(i_src, j, k); + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp new file mode 100644 index 0000000000..50ac823319 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp @@ -0,0 +1,115 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_IMPL_HPP_ +#define KOKKOS_NESTED_SORT_IMPL_HPP_ + +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// true for TeamVectorRange, false for ThreadVectorRange +template +struct NestedRange {}; + +// Specialization for team-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::TeamVectorRange(t, len); + } + template + KOKKOS_FUNCTION static void barrier(const TeamMember& t) { + t.team_barrier(); + } +}; + +// Specialization for thread-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::ThreadVectorRange(t, len); + } + // Barrier is no-op, as vector lanes of a thread are implicitly synchronized + // after parallel region + template + KOKKOS_FUNCTION static void barrier(const TeamMember&) {} +}; + +// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. +// This only takes the NestedRange instance for template arg deduction. +template +KOKKOS_INLINE_FUNCTION void sort_nested_impl( + const TeamMember& t, const KeyViewType& keyView, + [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, + const NestedRange) { + using SizeType = typename KeyViewType::size_type; + using KeyType = typename KeyViewType::non_const_value_type; + using Range = NestedRange; + SizeType n = keyView.extent(0); + SizeType npot = 1; + SizeType levels = 0; + // FIXME: ceiling power-of-two is a common thing to need - make it a utility + while (npot < n) { + levels++; + npot <<= 1; + } + for (SizeType i = 0; i < levels; i++) { + for (SizeType j = 0; j <= i; j++) { + // n/2 pairs of items are compared in parallel + Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { + // How big are the brown/pink boxes? + // (Terminology comes from Wikipedia diagram) + // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg + SizeType boxSize = SizeType(2) << (i - j); + // Which box contains this thread? + SizeType boxID = k >> (i - j); // k * 2 / boxSize; + SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize + SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; + SizeType elem1 = boxStart + boxOffset; + // In first phase (j == 0, brown box): within a box, compare with the + // opposite value in the box. + // In later phases (j > 0, pink box): within a box, compare with fixed + // distance (boxSize / 2) apart. + SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) + : (elem1 + boxSize / 2); + if (elem2 < n) { + KeyType key1 = keyView(elem1); + KeyType key2 = keyView(elem2); + if (comp(key2, key1)) { + keyView(elem1) = key2; + keyView(elem2) = key1; + if constexpr (!std::is_same_v) { + Kokkos::Experimental::swap(valueView(elem1), valueView(elem2)); + } + } + } + }); + Range::barrier(t); + } + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp new file mode 100644 index 0000000000..d87ab09e77 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -0,0 +1,369 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ +#define KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ + +#include "../Kokkos_BinOpsPublicAPI.hpp" +#include "../Kokkos_BinSortPublicAPI.hpp" +#include +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) + +// Workaround for `Instruction 'shfl' without '.sync' is not supported on +// .target sm_70 and higher from PTX ISA version 6.4`. +// Also see https://github.com/NVIDIA/cub/pull/170. +#if !defined(CUB_USE_COOPERATIVE_GROUPS) +#define CUB_USE_COOPERATIVE_GROUPS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + +#if defined(KOKKOS_COMPILER_CLANG) +// Some versions of Clang fail to compile Thrust, failing with errors like +// this: +// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: +// error: use of undeclared identifier 'va_printf' +// The exact combination of versions for Clang and Thrust (or CUDA) for this +// failure was not investigated, however even very recent version combination +// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. +// +// Defining _CubLog here locally allows us to avoid that code path, however +// disabling some debugging diagnostics +#pragma push_macro("_CubLog") +#ifdef _CubLog +#undef _CubLog +#endif +#define _CubLog +#include +#include +#pragma pop_macro("_CubLog") +#else +#include +#include +#endif + +#pragma GCC diagnostic pop + +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +#include +#include +#endif + +namespace Kokkos { +namespace Impl { + +template +struct better_off_calling_std_sort : std::false_type {}; + +#if defined KOKKOS_ENABLE_SERIAL +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_OPENMP +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_THREADS +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_HPX +template <> +struct better_off_calling_std_sort : std::true_type { +}; +#endif + +template +inline constexpr bool better_off_calling_std_sort_v = + better_off_calling_std_sort::value; + +template +struct min_max_functor { + using minmax_scalar = + Kokkos::MinMaxScalar; + + ViewType view; + min_max_functor(const ViewType& view_) : view(view_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t& i, minmax_scalar& minmax) const { + if (view(i) < minmax.min_val) minmax.min_val = view(i); + if (view(i) > minmax.max_val) minmax.max_val = view(i); + } +}; + +template +void sort_via_binsort(const ExecutionSpace& exec, + const Kokkos::View& view) { + // Although we are using BinSort below, which could work on rank-2 views, + // for now view must be rank-1 because the min_max_functor + // used below only works for rank-1 views + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + parallel_reduce("Kokkos::Sort::FindExtent", + Kokkos::RangePolicy( + exec, 0, view.extent(0)), + min_max_functor(view), reducer); + if (result.min_val == result.max_val) return; + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + bool sort_in_bins = true; + // TODO: figure out better max_bins then this ... + int64_t max_bins = view.extent(0) / 2; + if (std::is_integral::value) { + // Cast to double to avoid possible overflow when using integer + auto const max_val = static_cast(result.max_val); + auto const min_val = static_cast(result.min_val); + // using 10M as the cutoff for special behavior (roughly 40MB for the count + // array) + if ((max_val - min_val) < 10000000) { + max_bins = max_val - min_val + 1; + sort_in_bins = false; + } + } + if (std::is_floating_point::value) { + KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - + static_cast(result.min_val))); + } + + using CompType = BinOp1D; + BinSort bin_sort( + view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view); +} + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_cudathrust(const Cuda& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + const auto exec = thrust::cuda::par.on(space.cuda_stream()); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + thrust::sort(exec, first, last, + std::forward(maybeComparator)...); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_onedpl(const Kokkos::Experimental::SYCL& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(SpaceAccessibility::accessible, + "SYCL execution space is not able to access the memory space " + "of the View argument!"); + + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "SYCL sort only supports contiguous rank-1 Views with LayoutLeft, " + "LayoutRight or LayoutStride" + "For the latter, this means the View must have stride(0) = 1, enforced " + "at runtime."); + + if (view.stride(0) != 1) { + Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); + } + + if (view.extent(0) <= 1) { + return; + } + + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. + auto queue = space.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); + const int n = view.extent(0); + oneapi::dpl::sort(policy, view.data(), view.data() + n, + std::forward(maybeComparator)...); +} +#endif + +template +void copy_to_host_run_stdsort_copy_back( + const ExecutionSpace& exec, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + namespace KE = ::Kokkos::Experimental; + + using ViewType = Kokkos::View; + using layout = typename ViewType::array_layout; + if constexpr (std::is_same_v) { + // for strided views we cannot just deep_copy from device to host, + // so we need to do a few more jumps + using view_value_type = typename ViewType::non_const_value_type; + using view_exespace = typename ViewType::execution_space; + using view_deep_copyable_t = Kokkos::View; + view_deep_copyable_t view_dc("view_dc", view.extent(0)); + KE::copy(exec, view, view_dc); + + // run sort on the mirror of view_dc + auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); + auto first = KE::begin(mv_h); + auto last = KE::end(mv_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view_dc, mv_h); + + // copy back to argument view + KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); + } else { + auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); + auto first = KE::begin(view_h); + auto last = KE::end(view_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view, view_h); + } +} + +// -------------------------------------------------- +// +// specialize cases for sorting without comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_without_comparator( + const Cuda& exec, const Kokkos::View& view) { + sort_cudathrust(exec, view); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_without_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_without_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view); + } else { + copy_to_host_run_stdsort_copy_back(exec, view); + } +} +#endif + +// fallback case +template +std::enable_if_t::value> +sort_device_view_without_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view) { + sort_via_binsort(exec, view); +} + +// -------------------------------------------------- +// +// specialize cases for sorting with comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_with_comparator( + const Cuda& exec, const Kokkos::View& view, + const ComparatorType& comparator) { + sort_cudathrust(exec, view, comparator); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_with_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_with_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view, comparator); + } else { + copy_to_host_run_stdsort_copy_back(exec, view, comparator); + } +} +#endif + +template +std::enable_if_t::value> +sort_device_view_with_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // This is a fallback case if a more specialized overload does not exist: + // for now, this fallback copies data to host, runs std::sort + // and then copies data back. Potentially, this can later be changed + // with a better solution like our own quicksort on device or similar. + + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert(!SpaceAccessibility::accessible, + "Impl::sort_device_view_with_comparator: should not be called " + "on a view that is already accessible on the host"); + + copy_to_host_run_stdsort_copy_back(exec, view, comparator); +} + +} // namespace Impl +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp index 38dcd1a674..f254686dba 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp @@ -23,64 +23,85 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest) { +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, OutputIteratorType first_dest, - BinaryOp bin_op) { - return Impl::adjacent_difference_impl( +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, bin_op); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest) { +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, BinaryOp bin_op) { - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, bin_op); +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl(label, ex, first_from, + last_from, first_dest, bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -96,13 +117,15 @@ auto adjacent_difference( using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -111,13 +134,15 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -134,13 +159,15 @@ auto adjacent_difference( Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -149,9 +176,85 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), bin_op); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), bin_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType adjacent_difference( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + using value_type1 = typename InputIteratorType::value_type; + using value_type2 = typename OutputIteratorType::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, binary_op()); +} + +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +adjacent_difference(const TeamHandleType& teamHandle, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, bin_op); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + using view_type1 = ::Kokkos::View; + using view_type2 = ::Kokkos::View; + using value_type1 = typename view_type1::value_type; + using value_type2 = typename view_type2::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), binary_op()); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp bin_op) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), bin_op); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp index 43c2b66010..ac476ca5bf 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp @@ -23,71 +23,144 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set1 -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl(label, ex, first, last); + return Impl::adjacent_find_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v)); } // overload set2 -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last, pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl(label, ex, first, last, pred); + return Impl::adjacent_find_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v), + pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v), + pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set1 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::adjacent_find_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v)); +} + +// overload set2 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + BinaryPredicateType pred) { + return Impl::adjacent_find_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v), + pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp index 2ffec7e144..d6ed4c4a7e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first, - last, predicate); + return Impl::all_of_exespace_impl("Kokkos::all_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl(label, ex, first, last, predicate); + return Impl::all_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::all_of_exespace_impl("Kokkos::all_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::all_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::all_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp index 019c466c6d..82356e6598 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last, - predicate); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl(label, ex, first, last, predicate); + return Impl::any_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::any_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::any_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp index 028f3b66b2..b7ce1ba5ed 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp @@ -23,22 +23,31 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last, - d_first); + return Impl::copy_exespace_impl("Kokkos::copy_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl(label, ex, first, last, d_first); + return Impl::copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -46,12 +55,15 @@ auto copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl("Kokkos::copy_view_api_default", ex, - KE::cbegin(source), KE::cend(source), KE::begin(dest)); + return Impl::copy_exespace_impl("Kokkos::copy_view_api_default", ex, + KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -59,8 +71,35 @@ auto copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source), - KE::begin(dest)); + return Impl::copy_exespace_impl(label, ex, KE::cbegin(source), + KE::cend(source), KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_team_impl(teamHandle, KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp index deff6baf9a..8f9e0f19b8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp @@ -23,42 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl(label, ex, first, last, d_last); + return Impl::copy_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex, - cbegin(source), cend(source), end(dest)); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_view_api_default", ex, cbegin(source), + cend(source), end(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source), - end(dest)); + return Impl::copy_backward_exespace_impl(label, ex, cbegin(source), + cend(source), end(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 copy_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::copy_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_team_impl(teamHandle, cbegin(source), cend(source), + end(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp index 3db2fc074f..ba18bc76b9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp @@ -23,46 +23,85 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first, - last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_iterator_api_default", ex, + first, last, d_first, std::move(pred)); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl(label, ex, first, last, d_first, + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_view_api_default", ex, + cbegin(source), cend(source), begin(dest), + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::copy_if_exespace_impl(label, ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first, Predicate pred) { + return Impl::copy_if_team_impl(teamHandle, first, last, d_first, + std::move(pred)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest, Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp index a64f99b5c0..43c9120483 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp @@ -23,23 +23,32 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first, - count, result); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_iterator_api_default", ex, + first, count, result); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl(label, ex, first, count, result); + return Impl::copy_n_exespace_impl(label, ex, first, count, result); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, ::Kokkos::View& dest) { @@ -47,12 +56,14 @@ auto copy_n(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex, - KE::cbegin(source), count, KE::begin(dest)); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_view_api_default", ex, + KE::cbegin(source), count, KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, ::Kokkos::View& dest) { @@ -60,8 +71,35 @@ auto copy_n(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl(label, ex, KE::cbegin(source), count, - KE::begin(dest)); + return Impl::copy_n_exespace_impl(label, ex, KE::cbegin(source), count, + KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_n(const TeamHandleType& teamHandle, + InputIterator first, Size count, + OutputIterator result) { + return Impl::copy_n_team_impl(teamHandle, first, count, result); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, Size count, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_team_impl(teamHandle, KE::cbegin(source), count, + KE::begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp index 3ac63467ec..f179e88bab 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last, - value); + return Impl::count_exespace_impl("Kokkos::count_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl(label, ex, first, last, value); + return Impl::count_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), - KE::cend(v), value); + return Impl::count_exespace_impl("Kokkos::count_view_api_default", ex, + KE::cbegin(v), KE::cend(v), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value); + return Impl::count_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + const T& value) { + return Impl::count_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp index b9731d378a..967cf75e7a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp @@ -23,46 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first, - last, std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_iterator_api_default", + ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl(label, ex, first, last, std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count_if( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::count_if_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp index 37c0d75ef5..a72a49cc22 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp @@ -23,50 +23,61 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2); +// +// overload set accepting execution space +// +template && + Kokkos::is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2) { - return Impl::equal_impl(label, ex, first1, last1, first2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, - std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2) { @@ -74,13 +85,15 @@ bool equal(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2) { @@ -88,12 +101,14 @@ bool equal(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2, @@ -102,13 +117,15 @@ bool equal(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), - std::move(predicate)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2, @@ -117,51 +134,149 @@ bool equal(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2), std::move(predicate)); + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2, std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2, + std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2, - std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2, + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2, + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp index 4e05676c2c..ee3a105126 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp @@ -23,105 +23,130 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, - first_dest, init_value); + first_dest, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl(label, ex, first, last, - first_dest, init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value); + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); } // overload set 2 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, init_value, bop); + first_dest, std::move(init_value), bop); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest, - init_value, bop); + return Impl::exclusive_scan_custom_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -129,18 +154,20 @@ auto exclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, bop); + std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -148,12 +175,92 @@ auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, bop); + KE::begin(view_dest), std::move(init_value), bop); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); +} + +// overload set 2 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), bop); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), bop); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp index 1e300a4c20..6d805ba1be 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp @@ -23,33 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); + Impl::fill_exespace_impl("Kokkos::fill_iterator_api_default", ex, first, last, + value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl(label, ex, first, last, value); + Impl::fill_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), - value); + Impl::fill_exespace_impl("Kokkos::fill_view_api_default", ex, begin(view), + end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_exespace_impl(label, ex, begin(view), end(view), value); +} - Impl::fill_impl(label, ex, begin(view), end(view), value); +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, IteratorType first, + IteratorType last, const T& value) { + Impl::fill_team_impl(th, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_team_impl(th, begin(view), end(view), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp index 02503dfd14..66b8cd66cc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp @@ -23,38 +23,72 @@ namespace Kokkos { namespace Experimental { -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, - value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_iterator_api_default", ex, + first, n, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl(label, ex, first, n, value); + return Impl::fill_n_exespace_impl(label, ex, first, n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view), - n, value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_view_api_default", ex, + begin(view), n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl(label, ex, begin(view), n, value); + return Impl::fill_n_exespace_impl(label, ex, begin(view), n, value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType fill_n(const TeamHandleType& th, + IteratorType first, SizeType n, + const T& value) { + return Impl::fill_n_team_impl(th, first, n, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto fill_n(const TeamHandleType& th, + const ::Kokkos::View& view, + SizeType n, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::fill_n_team_impl(th, begin(view), n, value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp index 65b68cf931..e5e2b0e2b0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp @@ -23,36 +23,76 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last, - value); + return Impl::find_exespace_impl("Kokkos::find_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl(label, ex, first, last, value); + return Impl::find_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view), - KE::end(view), value); + return Impl::find_exespace_impl("Kokkos::find_view_api_default", ex, + KE::begin(view), KE::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value); + return Impl::find_exespace_impl(label, ex, KE::begin(view), KE::end(view), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION InputIterator find(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + const T& value) { + return Impl::find_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_team_impl(teamHandle, KE::begin(view), KE::end(view), + value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp index f6a38855eb..a4ec735fd5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp @@ -24,24 +24,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -49,13 +59,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -63,31 +75,38 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -96,13 +115,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -111,8 +132,71 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp index 6b0e4993ee..341a70e2f2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp @@ -23,24 +23,36 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +60,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,33 +76,41 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last, pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last, - pred); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -97,13 +119,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -112,8 +136,77 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, + s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp index 911316a668..283fab7617 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp @@ -23,42 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl("Kokkos::find_if_iterator_api_default", - ex, first, last, std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_iterator_api_default", ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl("Kokkos::find_if_view_api_default", ex, - KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_view_api_default", ex, KE::begin(v), KE::end(v), + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl(teamHandle, KE::begin(v), + KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp index 18294d7b7d..5e17a6f539 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp @@ -23,45 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_iterator_api_default", ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if_not(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if_not( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl( + teamHandle, KE::begin(v), KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp index d7b08e4842..6215b325af 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl(label, ex, first, last, std::move(functor)); + return Impl::for_each_exespace_impl(label, ex, first, last, + std::move(functor)); } -template +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first, - last, std::move(functor)); + return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", + ex, first, last, std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v), - std::move(functor)); + return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex, - KE::begin(v), KE::end(v), std::move(functor)); + return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), + std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); +} + +template , int> = 0> +KOKKOS_FUNCTION UnaryFunctorType +for_each(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), + std::move(functor)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp index f1769da05b..e6fbcad891 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp @@ -23,43 +23,87 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl(label, ex, first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex, - first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl( + "Kokkos::for_each_n_iterator_api_default", ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, KE::begin(v), n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex, - KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl("Kokkos::for_each_n_view_api_default", + ex, KE::begin(v), n, + std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION IteratorType for_each_n(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + UnaryFunctorType functor) { + return Impl::for_each_n_team_impl(teamHandle, first, n, std::move(functor)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto for_each_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_team_impl(teamHandle, KE::begin(v), n, + std::move(functor)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp index 13e12783e0..a3295084ee 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp @@ -23,38 +23,68 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last, - std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_iterator_api_default", ex, + first, last, std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl(label, ex, first, last, std::move(g)); + Impl::generate_exespace_impl(label, ex, first, last, std::move(g)); } -template +template , int> = 0> void generate(const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view), - end(view), std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_view_api_default", ex, + begin(view), end(view), std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl(label, ex, begin(view), end(view), std::move(g)); + Impl::generate_exespace_impl(label, ex, begin(view), end(view), std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void generate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + Impl::generate_team_impl(teamHandle, first, last, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION void generate( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::generate_team_impl(teamHandle, begin(view), end(view), std::move(g)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp index 4d17512228..e480062c23 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp @@ -23,40 +23,75 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first, - count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl( + "Kokkos::generate_n_iterator_api_default", ex, first, count, + std::move(g)); } -template +template , int> = 0> IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl(label, ex, first, count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl(label, ex, first, count, std::move(g)); } -template +template , int> = 0> auto generate_n(const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex, - begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl("Kokkos::generate_n_view_api_default", + ex, begin(view), count, std::move(g)); } -template +template , int> = 0> auto generate_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl(label, ex, begin(view), count, + std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType generate_n(const TeamHandleType& teamHandle, + IteratorType first, Size count, + Generator g) { + return Impl::generate_n_team_impl(teamHandle, first, count, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto generate_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::generate_n_team_impl(teamHandle, begin(view), count, + std::move(g)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp index bcd731b850..a0e540b5e7 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp @@ -23,33 +23,45 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, first_dest); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl(label, ex, first, last, - first_dest); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl(label, ex, first, last, + first_dest); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -57,13 +69,15 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl( + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -71,39 +85,45 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest)); + return Impl::inclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); } // overload set 2 (accepting custom binary op) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last, - first_dest, binary_op); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -111,14 +131,16 @@ auto inclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -126,67 +148,192 @@ auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } // overload set 3 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op, ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, binary_op, init_value); + first_dest, binary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op, - ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( - label, ex, first, last, first_dest, binary_op, init_value); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, init_value); + binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, init_value); + KE::begin(view_dest), binary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_team_impl(teamHandle, first, last, + first_dest); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); +} + +// overload set 2 (accepting custom binary op) +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op); +} + +// overload set 3 +template && :: + Kokkos::is_team_handle_v, + int> = 0> + +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, std::move(init_value)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp index 29d6be9e8b..42f20bc4ec 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp @@ -23,39 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl( + return Impl::is_partitioned_exespace_impl( "Kokkos::is_partitioned_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl(label, ex, first, last, std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default", - ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl( + "Kokkos::is_partitioned_view_api_default", ex, cbegin(v), cend(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, cbegin(v), cend(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType p) { + return Impl::is_partitioned_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::is_partitioned_team_impl(teamHandle, cbegin(v), cend(v), + std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp index f036254a02..2c676c3ff3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp @@ -23,55 +23,73 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl(label, ex, first, last); + return Impl::is_sorted_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -79,13 +97,15 @@ bool is_sorted(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view), + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -93,8 +113,56 @@ bool is_sorted(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view), std::move(comp)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::is_sorted_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_team_impl(teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), KE::cend(view), + std::move(comp)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp index 276b3bb884..96a17b6785 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp @@ -23,58 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl(label, ex, first, last); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last, + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -82,13 +102,15 @@ auto is_sorted_until(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -96,8 +118,57 @@ auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view), std::move(comp)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::is_sorted_until_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_until_team_impl(teamHandle, first, last, + std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view), std::move(comp)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp index 0a77ef629f..4b5c69df45 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp @@ -23,25 +23,34 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -50,13 +59,15 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -65,33 +76,39 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2)); + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2, comp); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2, comp); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2, comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -100,13 +117,15 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -115,9 +134,67 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2), comp); + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2, comp); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp index 2c1374f700..d16bac5bfc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp index 1d03b7c962..2a53fce3e2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp index d481b499cc..c3a1f73ef6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp @@ -23,82 +23,151 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl(label, ex, first, last); + return Impl::minmax_element_exespace_impl(label, ex, + first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl(label, ex, begin(v), - end(v)); + return Impl::minmax_element_exespace_impl( + label, ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::minmax_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_team_impl(teamHandle, + begin(v), end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp index 13c994ca90..090afe69e3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp @@ -30,46 +30,60 @@ namespace Experimental { // // makes API ambiguous (with the overload accepting views). -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_iterator_api_default", + ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_iterator_api_default", ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2); + return Impl::mismatch_exespace_impl(label, ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -77,13 +91,15 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2)); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_view_api_default", ex, + KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -92,14 +108,16 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_view_api_default", ex, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -107,12 +125,15 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2)); + return Impl::mismatch_exespace_impl(label, ex, KE::begin(view1), + KE::end(view1), KE::begin(view2), + KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -121,9 +142,65 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, KE::begin(view1), KE::end(view1), KE::begin(view2), + KE::end(view2), std::forward(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType&& predicate) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2, + std::forward(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2, + BinaryPredicateType&& predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp index d49acd9f70..f04ea12ba8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl("Kokkos::move_iterator_api_default", ex, first, last, - d_first); + return Impl::move_exespace_impl("Kokkos::move_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl(label, ex, first, last, d_first); + return Impl::move_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl("Kokkos::move_view_api_default", ex, begin(source), - end(source), begin(dest)); + return Impl::move_exespace_impl("Kokkos::move_view_api_default", ex, + begin(source), end(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl(label, ex, begin(source), end(source), begin(dest)); + return Impl::move_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator move(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::move_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp index 60d50fa881..375474ca57 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl("Kokkos::move_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl("Kokkos::move_backward_view_api_default", ex, - begin(source), end(source), end(dest)); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_view_api_default", ex, begin(source), end(source), + end(dest)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl(label, ex, first, last, d_last); + return Impl::move_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl(label, ex, begin(source), end(source), - end(dest)); + return Impl::move_backward_exespace_impl(label, ex, begin(source), + end(source), end(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 move_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::move_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_backward_team_impl(teamHandle, begin(source), end(source), + end(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp index cf5de3b72b..f7baab3fc0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp @@ -23,41 +23,80 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl("Kokkos::none_of_iterator_api_default", ex, first, - last, predicate); + return Impl::none_of_exespace_impl("Kokkos::none_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl(label, ex, first, last, predicate); + return Impl::none_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl("Kokkos::none_of_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::none_of_exespace_impl("Kokkos::none_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::none_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, Predicate predicate) { + return Impl::none_of_team_impl(teamHandle, first, last, predicate); +} + +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::none_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp index 38c0a35b62..a1feee8d6d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp @@ -23,57 +23,103 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl( + return Impl::partition_copy_exespace_impl( "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last, to_first_true, to_first_false, std::move(p)); } -template +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const std::string& label, const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, from_first, from_last, - to_first_true, to_first_false, std::move(p)); + return Impl::partition_copy_exespace_impl(label, ex, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl("Kokkos::partition_copy_view_api_default", - ex, cbegin(view_from), cend(view_from), - begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + "Kokkos::partition_copy_view_api_default", ex, cbegin(view_from), + cend(view_from), begin(view_dest_true), begin(view_dest_false), + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, cbegin(view_from), - cend(view_from), begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + label, ex, cbegin(view_from), cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair +partition_copy(const TeamHandleType& teamHandle, InputIteratorType from_first, + InputIteratorType from_last, + OutputIteratorTrueType to_first_true, + OutputIteratorFalseType to_first_false, PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest_true, + const ::Kokkos::View& view_dest_false, + PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, cbegin(view_from), + cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp index 24798e377e..60cbeeda87 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp @@ -23,38 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl( + return Impl::partition_point_exespace_impl( "Kokkos::partitioned_point_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl(label, ex, first, last, std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl(label, ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, begin(v), end(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl("Kokkos::partition_point_view_api_default", - ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl( + "Kokkos::partition_point_view_api_default", ex, begin(v), end(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType partition_point(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + UnaryPredicate p) { + return Impl::partition_point_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_point( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, UnaryPredicate p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::partition_point_team_impl(teamHandle, begin(v), end(v), + std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp index a31fa1497a..b84f00f8bb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp @@ -23,28 +23,38 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // // overload set 1 // -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> auto reduce(const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -53,12 +63,14 @@ auto reduce(const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), value_type()); } -template +template ::value, + int> = 0> auto reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -67,37 +79,43 @@ auto reduce(const std::string& label, const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), value_type()); + return Impl::reduce_default_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), value_type()); } // // overload set2: // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl(label, ex, first, last, - init_reduction_value); + return Impl::reduce_default_functors_exespace_impl(label, ex, first, last, + init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -107,13 +125,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -123,40 +143,46 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } // // overload set 3 // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl(label, ex, first, last, - init_reduction_value, joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -166,13 +192,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -182,9 +210,114 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), init_reduction_value, - joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value, + joiner); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// +// overload set 1 +// +template < + typename TeamHandleType, typename IteratorType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION typename IteratorType::value_type reduce( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { + return Impl::reduce_default_functors_team_impl( + teamHandle, first, last, typename IteratorType::value_type()); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto reduce( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + using view_type = ::Kokkos::View; + using value_type = typename view_type::value_type; + + return Impl::reduce_default_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), value_type()); +} + +// +// overload set2: +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_default_functors_team_impl(teamHandle, first, last, + init_reduction_value); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_default_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), init_reduction_value); +} + +// +// overload set 3 +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value, + BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_custom_functors_team_impl(teamHandle, first, last, + init_reduction_value, joiner); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_custom_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), + init_reduction_value, joiner); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp index c8602d2f53..8a429d8d51 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp @@ -23,38 +23,74 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, first, - last, value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl(label, ex, first, last, value); + return Impl::remove_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + const ValueType& value) { + return Impl::remove_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::remove_team_impl(teamHandle, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp index c2c06f6202..4b8fa9fe07 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp @@ -23,26 +23,36 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - first_from, last_from, first_dest, value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl(label, ex, first_from, last_from, first_dest, - value); + return Impl::remove_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -50,15 +60,17 @@ auto remove_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), - value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, + ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -66,12 +78,46 @@ auto remove_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl( + return Impl::remove_copy_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), value); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& value) { + return Impl::remove_copy_team_impl(teamHandle, first_from, last_from, + first_dest, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp index 6d642ed6f0..45e2b54bb6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from, first_dest, pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl(label, ex, first_from, last_from, first_dest, - pred); + return Impl::remove_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,15 +63,17 @@ auto remove_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -70,12 +81,46 @@ auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const UnaryPredicate& pred) { + return Impl::remove_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const UnaryPredicate& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_if_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), pred); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp index 4062e8d373..38461a37f2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp @@ -23,39 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - first, last, pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl(label, ex, first, last, pred); + return Impl::remove_if_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove_if(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + UnaryPredicate pred) { + return Impl::remove_if_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, UnaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::remove_if_team_impl(teamHandle, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp index 4d1490ded0..29afc4f0c2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp @@ -23,40 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl("Kokkos::replace_iterator_api", ex, first, last, - old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_iterator_api", ex, first, last, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl(label, ex, first, last, old_value, new_value); + Impl::replace_exespace_impl(label, ex, first, last, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl("Kokkos::replace_view_api", ex, KE::begin(view), - KE::end(view), old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_view_api", ex, KE::begin(view), + KE::end(view), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl(label, ex, KE::begin(view), KE::end(view), - old_value, new_value); + Impl::replace_exespace_impl(label, ex, KE::begin(view), KE::end(view), + old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace(const TeamHandleType& teamHandle, Iterator first, + Iterator last, const ValueType& old_value, + const ValueType& new_value) { + Impl::replace_team_impl(teamHandle, first, last, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_team_impl(teamHandle, KE::begin(view), KE::end(view), old_value, + new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp index e7f464e4bd..04d5767e89 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl("Kokkos::replace_copy_iterator_api", ex, - first_from, last_from, first_dest, old_value, - new_value); + return Impl::replace_copy_exespace_impl("Kokkos::replace_copy_iterator_api", + ex, first_from, last_from, first_dest, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl(label, ex, first_from, last_from, first_dest, - old_value, new_value); + return Impl::replace_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,13 +63,15 @@ auto replace_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl("Kokkos::replace_copy_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), old_value, new_value); + return Impl::replace_copy_exespace_impl( + "Kokkos::replace_copy_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -68,9 +79,43 @@ auto replace_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - old_value, new_value); + return Impl::replace_copy_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& old_value, + const ValueType& new_value) { + return Impl::replace_copy_team_impl(teamHandle, first_from, last_from, + first_dest, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), + old_value, new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp index 71ae8f8452..b87163f194 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp @@ -23,33 +23,42 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_iterator_api", ex, - first_from, last_from, first_dest, pred, - new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_iterator_api", ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl(label, ex, first_from, last_from, - first_dest, pred, new_value); + return Impl::replace_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -57,14 +66,16 @@ auto replace_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), pred, new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -72,9 +83,44 @@ auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - pred, new_value); + return Impl::replace_copy_if_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + PredicateType pred, + const ValueType& new_value) { + return Impl::replace_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + PredicateType pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_if_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), pred, new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp index 7f06540e06..73af1f16f0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp @@ -23,43 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl("Kokkos::replace_if_iterator_api", ex, first, - last, pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_iterator_api", ex, first, + last, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl(label, ex, first, last, pred, new_value); + Impl::replace_if_exespace_impl(label, ex, first, last, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl("Kokkos::replace_if_view_api", ex, - KE::begin(view), KE::end(view), pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_view_api", ex, + KE::begin(view), KE::end(view), pred, + new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl(label, ex, KE::begin(view), KE::end(view), pred, - new_value); + Impl::replace_if_exespace_impl(label, ex, KE::begin(view), KE::end(view), + pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate pred, const ValueType& new_value) { + Impl::replace_if_team_impl(teamHandle, first, last, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Predicate pred, + const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_if_team_impl(teamHandle, KE::begin(view), KE::end(view), pred, + new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp index 9f2fc5f3cc..a0786d3a2e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp @@ -23,34 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl("Kokkos::reverse_iterator_api_default", ex, first, - last); + return Impl::reverse_exespace_impl("Kokkos::reverse_iterator_api_default", ex, + first, last); } -template +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl(label, ex, first, last); + return Impl::reverse_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl("Kokkos::reverse_view_api_default", ex, - KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl("Kokkos::reverse_view_api_default", ex, + KE::begin(view), KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl(label, ex, KE::begin(view), KE::end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void reverse(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last) { + return Impl::reverse_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION void reverse( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::reverse_team_impl(teamHandle, KE::begin(view), KE::end(view)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp index 279bb22086..37336c983a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl("Kokkos::reverse_copy_iterator_api_default", - ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_iterator_api_default", ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl(label, ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl("Kokkos::reverse_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest)); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_view_api_default", ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest)); + return Impl::reverse_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator reverse_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::reverse_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto reverse_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::reverse_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp index 738e9bf137..aff04b47d6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp @@ -23,36 +23,71 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl("Kokkos::rotate_iterator_api_default", ex, first, - n_first, last); + return Impl::rotate_exespace_impl("Kokkos::rotate_iterator_api_default", ex, + first, n_first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl(label, ex, first, n_first, last); + return Impl::rotate_exespace_impl(label, ex, first, n_first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl("Kokkos::rotate_view_api_default", ex, begin(view), - begin(view) + n_location, end(view)); + return Impl::rotate_exespace_impl("Kokkos::rotate_view_api_default", ex, + begin(view), begin(view) + n_location, + end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl(label, ex, begin(view), begin(view) + n_location, - end(view)); + return Impl::rotate_exespace_impl(label, ex, begin(view), + begin(view) + n_location, end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType rotate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType n_first, + IteratorType last) { + return Impl::rotate_team_impl(teamHandle, first, n_first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + std::size_t n_location) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::rotate_team_impl(teamHandle, begin(view), + begin(view) + n_location, end(view)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp index f5d826c4bb..cce37fccfa 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp @@ -23,23 +23,34 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl("Kokkos::rotate_copy_iterator_api_default", ex, - first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_iterator_api_default", ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl(label, ex, first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl(label, ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -47,13 +58,15 @@ auto rotate_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl("Kokkos::rotate_copy_view_api_default", ex, - cbegin(source), cbegin(source) + n_location, - cend(source), begin(dest)); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_view_api_default", ex, cbegin(source), + cbegin(source) + n_location, cend(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -61,9 +74,41 @@ auto rotate_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl(label, ex, cbegin(source), - cbegin(source) + n_location, cend(source), - begin(dest)); + return Impl::rotate_copy_exespace_impl(label, ex, cbegin(source), + cbegin(source) + n_location, + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator rotate_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator n_first, + InputIterator last, + OutputIterator d_first) { + return Impl::rotate_copy_team_impl(teamHandle, first, n_first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + std::size_t n_location, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::rotate_copy_team_impl(teamHandle, cbegin(source), + cbegin(source) + n_location, cend(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp index b1154b297e..43258a484e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp @@ -23,24 +23,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl(label, ex, first, last, s_first, s_last); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +58,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,31 +74,38 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -95,13 +114,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -110,8 +131,70 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp index a649c8f205..0f8aa5f1c1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp @@ -23,68 +23,86 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl(label, ex, first, last, count, value); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value); } // overload set 2: binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value, pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value, pred); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl(label, ex, first, last, count, value, pred); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value, + pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -92,13 +110,15 @@ auto search_n(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value, - pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value, pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -106,8 +126,65 @@ auto search_n(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value, pred); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value, pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value); +} + +// overload set 2: binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value, pred); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value, const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value, pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp index 4b91a17ab8..b3e04a3b97 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl("Kokkos::shift_left_iterator_api_default", ex, - first, last, n); + return Impl::shift_left_exespace_impl( + "Kokkos::shift_left_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl(label, ex, first, last, n); + return Impl::shift_left_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl("Kokkos::shift_left_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_left_exespace_impl("Kokkos::shift_left_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl(label, ex, begin(view), end(view), n); + return Impl::shift_left_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_left(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_left_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_left( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_left_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp index 2ea50fd74e..0f7ed53948 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl("Kokkos::shift_right_iterator_api_default", ex, - first, last, n); + return Impl::shift_right_exespace_impl( + "Kokkos::shift_right_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl(label, ex, first, last, n); + return Impl::shift_right_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl("Kokkos::shift_right_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_right_exespace_impl("Kokkos::shift_right_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl(label, ex, begin(view), end(view), n); + return Impl::shift_right_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_right(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_right_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_right( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_right_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp index 5fbf045318..39f33b6487 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp @@ -23,15 +23,21 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl("Kokkos::swap_ranges_iterator_api_default", ex, - first1, last1, first2); + return Impl::swap_ranges_exespace_impl( + "Kokkos::swap_ranges_iterator_api_default", ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -39,19 +45,23 @@ auto swap_ranges(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl("Kokkos::swap_ranges_view_api_default", ex, - begin(source), end(source), begin(dest)); + return Impl::swap_ranges_exespace_impl("Kokkos::swap_ranges_view_api_default", + ex, begin(source), end(source), + begin(dest)); } -template +template , int> = 0> IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl(label, ex, first1, last1, first2); + return Impl::swap_ranges_exespace_impl(label, ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -59,8 +69,38 @@ auto swap_ranges(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl(label, ex, begin(source), end(source), - begin(dest)); + return Impl::swap_ranges_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 swap_ranges(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2) { + return Impl::swap_ranges_team_impl(teamHandle, first1, last1, first2); +} + +template , int> = 0> +KOKKOS_FUNCTION auto swap_ranges( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + assert(source.extent(0) == dest.extent(0)); + return Impl::swap_ranges_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp index 27dee30426..838c9169e2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp @@ -23,31 +23,39 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1, - OutputIterator d_first, UnaryOperation unary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, d_first, std::move(unary_op)); +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator first1, + InputIterator last1, OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, d_first, + std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator first1, InputIterator last1, OutputIterator d_first, - UnaryOperation unary_op) { - return Impl::transform_impl(label, ex, first1, last1, d_first, - std::move(unary_op)); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator first1, InputIterator last1, + OutputIterator d_first, UnaryOperation unary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, d_first, + std::move(unary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, @@ -55,13 +63,14 @@ auto transform(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source), end(source), begin(dest), - std::move(unary_op)); + return Impl::transform_exespace_impl("Kokkos::transform_view_api_default", ex, + begin(source), end(source), begin(dest), + std::move(unary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, @@ -69,38 +78,44 @@ auto transform(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source), end(source), - begin(dest), std::move(unary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source), end(source), + begin(dest), std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator d_first, - BinaryOperation binary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, + OutputIterator d_first, BinaryOperation binary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, first2, d_first, + std::move(binary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator d_first, BinaryOperation binary_op) { - return Impl::transform_impl(label, ex, first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, first2, + d_first, std::move(binary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, @@ -110,14 +125,15 @@ auto transform(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source1), end(source1), begin(source2), - begin(dest), std::move(binary_op)); + return Impl::transform_exespace_impl( + "Kokkos::transform_view_api_default", ex, begin(source1), end(source1), + begin(source2), begin(dest), std::move(binary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, @@ -127,9 +143,79 @@ auto transform(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source1), end(source1), - begin(source2), begin(dest), - std::move(binary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator first1, + InputIterator last1, + OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, d_first, + std::move(unary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest, UnaryOperation unary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source), end(source), + begin(dest), std::move(unary_op)); +} + +template < + typename TeamHandleType, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, first2, d_first, + std::move(binary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source1, + const ::Kokkos::View& source2, + ::Kokkos::View& dest, + BinaryOperation binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp index 9d85aee06f..37fc0f860e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp @@ -23,44 +23,52 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType binary_op, - UnaryOpType unary_op) { +// +// overload set accepting execution space +// +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, init_value, binary_op, unary_op); + first, last, first_dest, std::move(init_value), binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl(label, ex, first, last, first_dest, - init_value, binary_op, unary_op); + return Impl::transform_exclusive_scan_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), binary_op, + unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -69,18 +77,20 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, binary_op, unary_op); + std::move(init_value), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -89,12 +99,56 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, binary_op, unary_op); + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_exclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::transform_exclusive_scan_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), binary_op, + unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_exclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp index 7489af7e37..5f694dbfd9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp @@ -23,40 +23,53 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 (no init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op); + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -66,15 +79,17 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -84,46 +99,59 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } // overload set 2 (init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op, - ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, + ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, binary_op, unary_op, init_value); + first, last, first_dest, binary_op, unary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op, ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op, init_value); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -132,16 +160,21 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, unary_op, init_value); + binary_op, unary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -150,10 +183,97 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, unary_op, init_value); + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 (no init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op); +} + +// overload set 2 (init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp index b5ec9066d2..101f5113f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp @@ -23,34 +23,44 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // ---------------------------- // overload set1: // no custom functors passed, so equivalent to // transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); // ---------------------------- -template +template ::value, + int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value)); } // overload1 accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -60,14 +70,16 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -77,7 +89,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } @@ -95,8 +107,11 @@ ValueType transform_reduce( // https://en.cppreference.com/w/cpp/algorithm/transform_reduce // api accepting iterators -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -105,14 +120,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -121,15 +139,17 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -143,16 +163,18 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -166,7 +188,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); @@ -176,43 +198,50 @@ ValueType transform_reduce( // overload set3: // // accepting iterators -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const ExecutionSpace& ex, IteratorType first1, - IteratorType last1, ValueType init_reduction_value, - BinaryJoinerType joiner, UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const ExecutionSpace& ex, IteratorType first1, + IteratorType last1, ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const std::string& label, const ExecutionSpace& ex, - IteratorType first1, IteratorType last1, - ValueType init_reduction_value, BinaryJoinerType joiner, - UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, + IteratorType first1, IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -224,14 +253,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -243,12 +275,154 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// ---------------------------- +// overload set1: +// no custom functors passed, so equivalent to +// transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); +// ---------------------------- +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + ValueType init_reduction_value) { + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value)); +} + +// overload1 accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value)); +} + +// +// overload set2: +// accepts a custom transform and joiner functor +// + +// Note the std refers to the arg BinaryReductionOp +// but in the Kokkos naming convention, it corresponds +// to a "joiner" that knows how to join two values +// NOTE: "joiner/transformer" need to be commutative. + +// https://en.cppreference.com/w/cpp/algorithm/transform_reduce + +// api accepting iterators +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, ValueType init_reduction_value, + BinaryJoinerType joiner, BinaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value, BinaryJoinerType joiner, + BinaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// +// overload set3: +// +// accepting iterators +template ::value && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType first1, + IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryJoinerType joiner, + UnaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), + std::move(init_reduction_value), std::move(joiner), + std::move(transformer)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp index b47ecffb20..2d56315f61 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp @@ -23,71 +23,132 @@ namespace Kokkos { namespace Experimental { -// note: the enable_if below is to avoid "call to ... is ambiguous" -// for example in the unit test when using a variadic function - -// overload set1 -template -std::enable_if_t::value, IteratorType> unique( - const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last); +// +// overload set1: default predicate, accepting execution space +// +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last); } -template -std::enable_if_t::value, IteratorType> unique( - const std::string& label, const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::unique_impl(label, ex, first, last); +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::unique_exespace_impl(label, ex, first, last); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique("Kokkos::unique_view_api_default", ex, - begin(view), end(view)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique(label, ex, begin(view), end(view)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// +template ::value, int> = 0> IteratorType unique(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last, pred); + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last, pred); } -template +template ::value, int> = 0> IteratorType unique(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl(label, ex, first, last, pred); + return Impl::unique_exespace_impl(label, ex, first, last, pred); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl("Kokkos::unique_view_api_default", ex, begin(view), - end(view), std::move(pred)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view), std::move(pred)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl(label, ex, begin(view), end(view), std::move(pred)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view), + std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::unique_team_impl(teamHandle, first, last); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value, int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, first, last, std::move(pred)); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view), + std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp index bd2451c220..4a32d7e095 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp @@ -23,67 +23,90 @@ namespace Kokkos { namespace Experimental { -// overload set1 -template -std::enable_if_t::value, OutputIterator> -unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first); +// +// overload set1: default predicate, accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first); } -template -std::enable_if_t::value, OutputIterator> -unique_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::unique_copy_impl(label, ex, first, last, d_first); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy( - "Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), - begin(dest)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy(label, ex, cbegin(source), - cend(source), begin(dest)); + return Impl::unique_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// + +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first, pred); + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first, + pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl(label, ex, first, last, d_first, pred); + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -91,13 +114,15 @@ auto unique_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl("Kokkos::unique_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -105,8 +130,70 @@ auto unique_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::unique_copy_exespace_impl( + label, ex, cbegin(source), cend(source), begin(dest), std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + std::enable_if_t && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first, + BinaryPredicate pred) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp index 8a474508d7..a8171fa068 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -63,14 +63,15 @@ struct StdAdjacentDiffFunctor { m_op(std::move(op)) {} }; +// +// exespace impl +// template -OutputIteratorType adjacent_difference_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - BinaryOp bin_op) { +OutputIteratorType adjacent_difference_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -81,20 +82,45 @@ OutputIteratorType adjacent_difference_impl(const std::string& label, return first_dest; } - // aliases - using value_type = typename OutputIteratorType::value_type; - using aux_view_type = ::Kokkos::View; - using functor_t = - StdAdjacentDiffFunctor; + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_for( + label, RangePolicy(ex, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); + ex.fence("Kokkos::adjacent_difference: fence after operation"); + + // return + return first_dest + num_elements; +} + +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOp bin_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + if (first_from == last_from) { + return first_dest; + } // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); - aux_view_type aux_view("aux_view", num_elements); - ::Kokkos::parallel_for(label, - RangePolicy(ex, 0, num_elements), - functor_t(first_from, first_dest, bin_op)); - ex.fence("Kokkos::adjacent_difference: fence after operation"); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); + teamHandle.team_barrier(); // return return first_dest + num_elements; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp index dd785e603b..f30b7be06a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -27,9 +27,9 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdAdjacentFindFunctor { + using index_type = typename IteratorType::difference_type; using red_value_type = typename ReducerType::value_type; IteratorType m_first; @@ -37,13 +37,13 @@ struct StdAdjacentFindFunctor { PredicateType m_p; KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { + void operator()(const index_type i, red_value_type& red_value) const { const auto& my_value = m_first[i]; const auto& next_value = m_first[i + 1]; const bool are_equal = m_p(my_value, next_value); // FIXME_NVHPC using a ternary operator causes problems - red_value_type value = {::Kokkos::reduction_identity::min()}; + red_value_type value = {::Kokkos::reduction_identity::min()}; if (are_equal) { value.min_loc_true = i; } @@ -59,10 +59,14 @@ struct StdAdjacentFindFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -76,8 +80,6 @@ IteratorType adjacent_find_impl(const std::string& label, using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; - using func_t = StdAdjacentFindFunctor; reduction_value_type red_result; reducer_type reducer(red_result); @@ -86,7 +88,8 @@ IteratorType adjacent_find_impl(const std::string& label, // each index i in the reduction checks i and (i+1). ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements - 1), - func_t(first, reducer, pred), reducer); + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), reducer); // fence not needed because reducing into scalar if (red_result.min_loc_true == @@ -98,12 +101,62 @@ IteratorType adjacent_find_impl(const std::string& label, } template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { using value_type = typename IteratorType::value_type; using default_pred_t = StdAlgoEqualBinaryPredicate; - return adjacent_find_impl(label, ex, first, last, default_pred_t()); + return adjacent_find_exespace_impl(label, ex, first, last, default_pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +adjacent_find_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + if (num_elements <= 1) { + return last; + } + + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + reduction_value_type red_result; + reducer_type reducer(red_result); + + // note that we use below num_elements-1 because + // each index i in the reduction checks i and (i+1). + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements - 1), + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + return last; + } else { + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType adjacent_find_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using default_pred_t = StdAlgoEqualBinaryPredicate; + return adjacent_find_team_impl(teamHandle, first, last, default_pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp index ad562070a0..bdc050f9c1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp @@ -23,23 +23,58 @@ namespace Kokkos { namespace Experimental { namespace Impl { +// +// exespace impl +// template -bool all_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == - last); +bool all_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); } template -bool any_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) != last); +bool any_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) != last); } template -bool none_of_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == last); +bool none_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool all_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); +} + +template +KOKKOS_FUNCTION bool any_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) != + last); +} + +template +KOKKOS_FUNCTION bool none_of_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 0376100410..27ce5a6fad 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -55,6 +55,9 @@ using iterator_category_t = typename T::iterator_category; template using is_iterator = Kokkos::is_detected; +template +inline constexpr bool is_iterator_v = is_iterator::value; + // // are_iterators // @@ -63,15 +66,18 @@ struct are_iterators; template struct are_iterators { - static constexpr bool value = is_iterator::value; + static constexpr bool value = is_iterator_v; }; template struct are_iterators { static constexpr bool value = - are_iterators::value && are_iterators::value; + are_iterators::value && (are_iterators::value && ... && true); }; +template +inline constexpr bool are_iterators_v = are_iterators::value; + // // are_random_access_iterators // @@ -81,17 +87,21 @@ struct are_random_access_iterators; template struct are_random_access_iterators { static constexpr bool value = - is_iterator::value && - std::is_base_of::value; + is_iterator_v && std::is_base_of::value; }; template struct are_random_access_iterators { - static constexpr bool value = are_random_access_iterators::value && - are_random_access_iterators::value; + static constexpr bool value = + are_random_access_iterators::value && + (are_random_access_iterators::value && ... && true); }; +template +inline constexpr bool are_random_access_iterators_v = + are_random_access_iterators::value; + // // iterators_are_accessible_from // @@ -113,16 +123,18 @@ struct iterators_are_accessible_from { iterators_are_accessible_from::value; }; -template +template KOKKOS_INLINE_FUNCTION constexpr void -static_assert_random_access_and_accessible(const ExecutionSpace& /* ex */, - IteratorTypes... /* iterators */) { +static_assert_random_access_and_accessible( + const ExecutionSpaceOrTeamHandleType& /* ex_or_th*/, + IteratorTypes... /* iterators */) { static_assert( are_random_access_iterators::value, "Currently, Kokkos standard algorithms require random access iterators."); - static_assert( - iterators_are_accessible_from::value, - "Incompatible view/iterator and execution space"); + static_assert(iterators_are_accessible_from< + typename ExecutionSpaceOrTeamHandleType::execution_space, + IteratorTypes...>::value, + "Incompatible view/iterator and execution space"); } // @@ -182,10 +194,10 @@ struct not_openmptarget { #endif }; -template +template KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( - const ExecutionSpace&) { - static_assert(not_openmptarget::value, + const ExecutionSpaceOrTeamHandleType& /*ex_or_th*/) { + static_assert(not_openmptarget::value, "Currently, Kokkos standard algorithms do not support custom " "comparators in OpenMPTarget"); } @@ -194,7 +206,8 @@ KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( // valid range // template -void expect_valid_range(IteratorType first, IteratorType last) { +KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first, + IteratorType last) { // this is a no-op for release KOKKOS_EXPECTS(last >= first); // avoid compiler complaining when KOKKOS_EXPECTS is no-op diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp index b3adbc5e2d..0f68c9e978 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp @@ -27,16 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyBackwardFunctor { - static_assert(std::is_signed::value, - "Kokkos: StdCopyBackwardFunctor requires signed index type"); + // we can use difference type from IteratorType1 since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename IteratorType1::difference_type; IteratorType1 m_last; IteratorType2 m_dest_last; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } + void operator()(index_type i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } KOKKOS_FUNCTION StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) @@ -44,30 +46,51 @@ struct StdCopyBackwardFunctor { }; template -IteratorType2 copy_backward_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { +IteratorType2 copy_backward_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_last); Impl::static_assert_iterators_have_matching_difference_type(first, d_last); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = - StdCopyBackwardFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(last, d_last)); + // use CTAD + StdCopyBackwardFunctor(last, d_last)); ex.fence("Kokkos::copy_backward: fence after operation"); // return return d_last - num_elements; } +// +// team-level impl +// +template +KOKKOS_FUNCTION IteratorType2 +copy_backward_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_last); + Impl::static_assert_iterators_have_matching_difference_type(first, d_last); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyBackwardFunctor(last, d_last)); + teamHandle.team_barrier(); + + // return + return d_last - num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp index 1b120c46d0..86e99ecbd0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp @@ -27,13 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyFunctor { + // we can use difference type from InputIterator since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename InputIterator::difference_type; + InputIterator m_first; OutputIterator m_dest_first; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_first[i] = m_first[i]; } + void operator()(index_type i) const { m_dest_first[i] = m_first[i]; } KOKKOS_FUNCTION StdCopyFunctor(InputIterator _first, OutputIterator _dest_first) @@ -41,23 +46,20 @@ struct StdCopyFunctor { }; template -OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { +OutputIterator copy_exespace_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = StdCopyFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, d_first)); + // use CTAD + StdCopyFunctor(first, d_first)); ex.fence("Kokkos::copy: fence after operation"); // return @@ -66,16 +68,61 @@ OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, template -OutputIterator copy_n_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first_from, Size count, - OutputIterator first_dest) { +OutputIterator copy_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first_from, Size count, + OutputIterator first_dest) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); if (count > 0) { - return copy_impl(label, ex, first_from, first_from + count, first_dest); + return copy_exespace_impl(label, ex, first_from, first_from + count, + first_dest); + } else { + return first_dest; + } +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION OutputIterator copy_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyFunctor(first, d_first)); + teamHandle.team_barrier(); + + // return + return d_first + num_elements; +} + +template +KOKKOS_FUNCTION OutputIterator +copy_n_team_impl(const TeamHandleType& teamHandle, InputIterator first_from, + Size count, OutputIterator first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + + if (count > 0) { + return copy_team_impl(teamHandle, first_from, first_from + count, + first_dest); } else { return first_dest; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index 3c0c4f7e9b..3c1e2474bc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -20,6 +20,7 @@ #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_MustUseKokkosSingleInTeam.hpp" #include #include @@ -27,8 +28,10 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyIfFunctor { + using index_type = typename FirstFrom::difference_type; + FirstFrom m_first_from; FirstDest m_first_dest; PredType m_pred; @@ -40,7 +43,7 @@ struct StdCopyIfFunctor { m_pred(std::move(pred)) {} KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, + void operator()(const index_type i, index_type& update, const bool final_pass) const { const auto& myval = m_first_from[i]; if (final_pass) { @@ -57,9 +60,11 @@ struct StdCopyIfFunctor { template -OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first, PredicateType pred) { +OutputIterator copy_if_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, + PredicateType pred) { /* To explain the impl, suppose that our data is: @@ -90,23 +95,67 @@ OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, if (first == last) { return d_first; } else { - // aliases - using index_type = typename InputIterator::difference_type; - using func_type = StdCopyIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); - index_type count = 0; + + typename InputIterator::difference_type count = 0; ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(first, d_first, pred), count); + // use CTAD + StdCopyIfFunctor(first, d_first, pred), count); // fence not needed because of the scan accumulating into count return d_first + count; } } +template +KOKKOS_FUNCTION OutputIterator copy_if_team_impl( + const TeamHandleType& teamHandle, InputIterator first, InputIterator last, + OutputIterator d_first, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return d_first; + } + + const std::size_t num_elements = Kokkos::Experimental::distance(first, last); + if constexpr (stdalgo_must_use_kokkos_single_for_team_scan_v< + typename TeamHandleType::execution_space>) { + std::size_t count = 0; + Kokkos::single( + Kokkos::PerTeam(teamHandle), + [=](std::size_t& lcount) { + lcount = 0; + for (std::size_t i = 0; i < num_elements; ++i) { + const auto& myval = first[i]; + if (pred(myval)) { + d_first[lcount++] = myval; + } + } + }, + count); + // no barrier needed since single above broadcasts to all members + return d_first + count; + + } else { + typename InputIterator::difference_type count = 0; + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + StdCopyIfFunctor(first, d_first, pred), count); + // no barrier needed because of the scan accumulating into count + return d_first + count; + } + +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) + __builtin_unreachable(); +#endif +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp index 18b8c46359..9b6b403aa4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp @@ -46,37 +46,65 @@ struct StdCountIfFunctor { }; template -typename IteratorType::difference_type count_if_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - Predicate predicate) { +typename IteratorType::difference_type count_if_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType first, + IteratorType last, Predicate predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdCountIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); typename IteratorType::difference_type count = 0; ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), - func_t(first, predicate), count); + // use CTAD + StdCountIfFunctor(first, predicate), count); ex.fence("Kokkos::count_if: fence after operation"); return count; } template -auto count_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { - return count_if_impl( +auto count_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + const T& value) { + return count_if_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team-level impl +// +template +KOKKOS_FUNCTION typename IteratorType::difference_type count_if_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + typename IteratorType::difference_type count = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCountIfFunctor(first, predicate), count); + teamHandle.team_barrier(); + + return count; +} + +template +KOKKOS_FUNCTION auto count_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + return count_if_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp index e045080d4a..62b7d226f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp @@ -27,15 +27,16 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdEqualFunctor { + using index_type = typename IteratorType1::difference_type; + IteratorType1 m_first1; IteratorType2 m_first2; BinaryPredicateType m_predicate; KOKKOS_FUNCTION - void operator()(IndexType i, std::size_t& lsum) const { + void operator()(index_type i, std::size_t& lsum) const { if (!m_predicate(m_first1[i], m_first2[i])) { lsum = 1; } @@ -49,67 +50,130 @@ struct StdEqualFunctor { m_predicate(std::move(_predicate)) {} }; +// +// exespace impl +// template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = StdEqualFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first1, last1); std::size_t different = 0; - ::Kokkos::parallel_reduce(label, - RangePolicy(ex, 0, num_elements), - func_t(first1, first2, predicate), different); + ::Kokkos::parallel_reduce( + label, RangePolicy(ex, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), different); ex.fence("Kokkos::equal: fence after operation"); return !different; } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, pred_t()); + return equal_exespace_impl(label, ex, first1, last1, first2, pred_t()); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2, BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { const auto d1 = ::Kokkos::Experimental::distance(first1, last1); const auto d2 = ::Kokkos::Experimental::distance(first2, last2); if (d1 != d2) { return false; } - return equal_impl(label, ex, first1, last1, first2, predicate); + return equal_exespace_impl(label, ex, first1, last1, first2, predicate); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, last2, pred_t()); + return equal_exespace_impl(label, ex, first1, last1, first2, last2, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + // run + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + std::size_t different = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), + different); + teamHandle.team_barrier(); + + return !different; +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_team_impl(teamHandle, first1, last1, first2, pred_t()); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + const auto d1 = ::Kokkos::Experimental::distance(first1, last1); + const auto d2 = ::Kokkos::Experimental::distance(first2, last2); + if (d1 != d2) { + return false; + } + + return equal_team_impl(teamHandle, first1, last1, first2, predicate); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_team_impl(teamHandle, first1, last1, first2, last2, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp index 71f13e490a..6da992b4bb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -22,6 +22,7 @@ #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" #include "Kokkos_IdentityReferenceUnaryFunctor.hpp" +#include "Kokkos_FunctorsForExclusiveScan.hpp" #include #include #include @@ -30,127 +31,15 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template -struct ExclusiveScanDefaultFunctorForKnownNeutralElement { - using execution_space = ExeSpace; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, - FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, ValueType& update, - const bool final_pass) const { - if (final_pass) m_first_dest[i] = update + m_init_value; - update += m_first_from[i]; - } -}; - -template -struct ExclusiveScanDefaultFunctor { - using execution_space = ExeSpace; - using value_type = - ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctor(ValueType init, FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - if (final_pass) { - if (i == 0) { - m_first_dest[i] = m_init_value; - } else { - m_first_dest[i] = update.val + m_init_value; - } - } - - const auto tmp = value_type{m_first_from[i], false}; - this->join(update, tmp); - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(value_type& update, const value_type& input) const { - if (input.is_initial) return; - - if (update.is_initial) { - update.val = input.val; - update.is_initial = false; - } else { - update.val = update.val + input.val; - } - } -}; - -template -OutputIteratorType exclusive_scan_custom_op_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = - TransformExclusiveScanFunctor; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest, bop, unary_op_type())); - ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -template -using ex_scan_has_reduction_identity_sum_t = - decltype(Kokkos::reduction_identity::sum()); - +// +// exespace impl +// template -OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - ValueType init_value) { +OutputIteratorType exclusive_scan_default_op_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -184,17 +73,122 @@ OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, ExclusiveScanDefaultFunctorForKnownNeutralElement< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType>, - ExclusiveScanDefaultFunctor>; + ExclusiveScanDefaultFunctorWithValueWrapper>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy(ex, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + + ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + + return first_dest + num_elements; +} + +template +OutputIteratorType exclusive_scan_custom_op_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TransformExclusiveScanFunctorWithValueWrapper< + ExecutionSpace, index_type, ValueType, InputIteratorType, + OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest)); + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + // return + return first_dest + num_elements; +} + +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = ExclusiveScanDefaultFunctorForKnownNeutralElement< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + teamHandle.team_barrier(); + return first_dest + num_elements; +} + +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using index_type = typename InputIteratorType::difference_type; + using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + teamHandle.team_barrier(); return first_dest + num_elements; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp index 316d865f31..972e57f2cc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp @@ -41,9 +41,12 @@ struct StdFillFunctor { : m_first(std::move(_first)), m_value(std::move(_value)) {} }; +// +// exespace impl +// template -void fill_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { +void fill_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -52,13 +55,14 @@ void fill_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - StdFillFunctor(first, value)); + StdFillFunctor(first, value)); ex.fence("Kokkos::fill: fence after operation"); } template -IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, const T& value) { +IteratorType fill_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + SizeType n, const T& value) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -67,7 +71,40 @@ IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - fill_impl(label, ex, first, last, value); + fill_exespace_impl(label, ex, first, last, value); + return last; +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION void fill_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdFillFunctor(first, value)); + + teamHandle.team_barrier(); +} + +template +KOKKOS_FUNCTION IteratorType fill_n_team_impl(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + const T& value) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + if (n <= 0) { + return first; + } + + fill_team_impl(teamHandle, first, last, value); return last; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp index 3ec64fa43d..1f1ec5e54f 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp @@ -80,12 +80,17 @@ struct StdFindEndFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -97,7 +102,6 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = KE::distance(first, last); const auto s_count = KE::distance(s_first, s_last); KOKKOS_EXPECTS(num_elements >= s_count); - (void)s_count; // needed when macro above is a no-op if (s_first == s_last) { return last; @@ -109,7 +113,8 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, // special case where the two ranges have equal size if (num_elements == s_count) { - const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); + const auto equal_result = + equal_exespace_impl(label, ex, first, last, s_first, pred); return (equal_result) ? first : last; } else { using index_type = typename IteratorType1::difference_type; @@ -148,14 +153,97 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, } template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_end_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_end_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_end_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + // the target sequence should not be larger than the range [first, last) + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + const auto s_count = KE::distance(s_first, s_last); + KOKKOS_EXPECTS(num_elements >= s_count); + + if (s_first == s_last) { + return last; + } + + if (first == last) { + return last; + } + + // special case where the two ranges have equal size + if (num_elements == s_count) { + const auto equal_result = + equal_team_impl(teamHandle, first, last, s_first, pred); + return (equal_result) ? first : last; + } else { + using index_type = typename IteratorType1::difference_type; + using reducer_type = LastLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindEndFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + + // decide the size of the range policy of the par_red: + // note that the last feasible index to start looking is the index + // whose distance from the "last" is equal to the sequence count. + // the +1 is because we need to include that location too. + const auto range_size = num_elements - s_count + 1; + + // run par reduce + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, range_size), + func_t(first, last, s_first, s_last, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.max_loc_true == + ::Kokkos::reduction_identity::max()) { + // if here, a subrange has not been found + return last; + } else { + // a location has been found + return first + red_result.max_loc_true; + } + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_end_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_end_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp index 5f22d2ad13..145e235b9d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -71,13 +71,15 @@ struct StdFindFirstOfFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -116,15 +118,71 @@ IteratorType1 find_first_of_impl(const std::string& label, } template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_first_of_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_first_of_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_first_of_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + if ((s_first == s_last) || (first == last)) { + return last; + } + + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindFirstOfFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, s_first, s_last, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // if here, nothing found + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_first_of_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp index 9c0b0c0ccd..8fffb59094 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -61,11 +61,15 @@ struct StdFindIfOrNotFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType find_if_or_not_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType find_if_or_not_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible( ex, first); // only need one It per type @@ -104,14 +108,68 @@ IteratorType find_if_or_not_impl(const std::string& label, } template -InputIterator find_impl(const std::string& label, ExecutionSpace ex, - InputIterator first, InputIterator last, - const T& value) { - return find_if_or_not_impl( +InputIterator find_exespace_impl(const std::string& label, ExecutionSpace ex, + InputIterator first, InputIterator last, + const T& value) { + return find_if_or_not_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +find_if_or_not_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible( + teamHandle, first); // only need one It per type + Impl::expect_valid_range(first, last); + + if (first == last) { + return last; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindIfOrNotFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // here, it means a valid loc has not been found, + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION InputIterator find_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + const T& value) { + return find_if_or_not_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp index f9a6ff2e99..d3be3b7f66 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -41,29 +41,31 @@ struct StdForEachFunctor { : m_first(std::move(_first)), m_functor(std::move(_functor)) {} }; -template -UnaryFunctorType for_each_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { +template +UnaryFunctorType for_each_exespace_impl(const std::string& label, + const HandleType& handle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks - Impl::static_assert_random_access_and_accessible(ex, first); + Impl::static_assert_random_access_and_accessible(handle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for( - label, RangePolicy(ex, 0, num_elements), + label, RangePolicy(handle, 0, num_elements), StdForEachFunctor(first, functor)); - ex.fence("Kokkos::for_each: fence after operation"); + handle.fence("Kokkos::for_each: fence after operation"); return functor; } template -IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, - UnaryFunctorType functor) { +IteratorType for_each_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, SizeType n, + UnaryFunctorType functor) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first, last); Impl::expect_valid_range(first, last); @@ -72,8 +74,46 @@ IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - for_each_impl(label, ex, first, last, std::move(functor)); - // no neeed to fence since for_each_impl fences already + for_each_exespace_impl(label, ex, first, last, std::move(functor)); + // no neeed to fence since for_each_exespace_impl fences already + + return last; +} + +// +// team impl +// +template +KOKKOS_FUNCTION UnaryFunctorType +for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, UnaryFunctorType functor) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdForEachFunctor(first, functor)); + teamHandle.team_barrier(); + return functor; +} + +template +KOKKOS_FUNCTION IteratorType +for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + SizeType n, UnaryFunctorType functor) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first, last); + Impl::expect_valid_range(first, last); + + if (n == 0) { + return first; + } + + for_each_team_impl(teamHandle, first, last, std::move(functor)); + // no neeed to fence since for_each_team_impl fences already return last; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp new file mode 100644 index 0000000000..8151ee3495 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp @@ -0,0 +1,220 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP + +#include +#include "Kokkos_ValueWrapperForNoNeutralElement.hpp" + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +using ex_scan_has_reduction_identity_sum_t = + decltype(Kokkos::reduction_identity::sum()); + +template +struct ExclusiveScanDefaultFunctorForKnownNeutralElement { + using execution_space = ExeSpace; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + if (final_pass) m_first_dest[i] = update + m_init_value; + update += m_first_from[i]; + } +}; + +template +struct ExclusiveScanDefaultFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = update.val + m_init_value; + } + } + + const auto tmp = value_type{m_first_from[i], false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + update.is_initial = false; + } else { + update.val = update.val + input.val; + } + } +}; + +template +struct TransformExclusiveScanFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update.val, m_init_value); + } + } + + const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION void init(value_type& value) const { + value.val = {}; + value.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + } else { + update.val = m_binary_op(update.val, input.val); + } + update.is_initial = false; + } +}; + +template +struct TransformExclusiveScanFunctorWithoutValueWrapper { + using execution_space = ExeSpace; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithoutValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update, m_init_value); + } + } + + const auto tmp = ValueType{m_unary_op(m_first_from[i])}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(ValueType& update) const { update = {}; } + + KOKKOS_FUNCTION + void join(ValueType& update, const ValueType& input) const { + update = m_binary_op(update, input); + } +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp index 228390bdff..157de1125e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp @@ -41,32 +41,65 @@ struct StdGenerateFunctor { : m_first(std::move(_first)), m_generator(std::move(_g)) {} }; +// +// generate impl +// template -void generate_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Generator g) { +void generate_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Generator g) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdGenerateFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, g)); + StdGenerateFunctor(first, g)); ex.fence("Kokkos::generate: fence after operation"); } +template +KOKKOS_FUNCTION void generate_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdGenerateFunctor(first, g)); + teamHandle.team_barrier(); +} + +// +// generate_n impl +// template -IteratorType generate_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, Size count, Generator g) { +IteratorType generate_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, Size count, + Generator g) { if (count <= 0) { return first; } - generate_impl(label, ex, first, first + count, g); + generate_exespace_impl(label, ex, first, first + count, g); + return first + count; +} + +template +KOKKOS_FUNCTION IteratorType +generate_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + Size count, Generator g) { + if (count <= 0) { + return first; + } + + generate_team_impl(teamHandle, first, first + count, g); return first + count; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index ecd6ff39cd..0b4acec0fe 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -101,9 +101,12 @@ struct InclusiveScanDefaultFunctor { } }; +// +// exespace impl +// template -OutputIteratorType inclusive_scan_default_op_impl( +OutputIteratorType inclusive_scan_default_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { @@ -143,7 +146,7 @@ OutputIteratorType inclusive_scan_default_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op) { @@ -158,7 +161,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( using value_type = std::remove_const_t; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanNoInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -179,7 +182,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op, @@ -193,7 +196,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // aliases using index_type = typename InputIteratorType::difference_type; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanWithInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -203,13 +206,142 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, - unary_op_type(), init_value)); + unary_op_type(), std::move(init_value))); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); // return return first_dest + num_elements; } +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + // #if defined(KOKKOS_ENABLE_CUDA) + + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = std::conditional_t< + ::Kokkos::is_detected::value, + InclusiveScanDefaultFunctorForKnownIdentityElement< + exe_space, index_type, value_type, InputIteratorType, + OutputIteratorType>, + InclusiveScanDefaultFunctor>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest)); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanNoInitValueFunctor< + exe_space, value_type, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, unary_op_type())); + teamHandle.team_barrier(); + + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl with init_value +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op, ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanWithInitValueFunctor< + exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, + unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, + unary_op_type(), std::move(init_value))); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp index 0fe2d246ff..281efca36b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -62,9 +62,9 @@ struct StdIsPartitionedFunctor { }; template -bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - PredicateType pred) { +bool is_partitioned_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType pred) { // true if all elements in the range [first, last) that satisfy // the predicate "pred" appear before all elements that don't. // Also returns true if [first, last) is empty. @@ -97,6 +97,7 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), + func_t(first, reducer, pred), reducer); // fence not needed because reducing into scalar @@ -109,8 +110,72 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, if (red_result.max_loc_true != red_id_max && red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true + return true; + } else { + return false; + } +} + +template +KOKKOS_FUNCTION bool is_partitioned_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + PredicateType pred) { + /* see exespace impl for the description of the impl */ + + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // trivial case + if (first == last) { + return true; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = StdIsPartitioned; + using reduction_value_type = typename reducer_type::value_type; + using func_t = + StdIsPartitionedFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + constexpr index_type red_id_min = + ::Kokkos::reduction_identity::min(); + constexpr index_type red_id_max = + ::Kokkos::reduction_identity::max(); + + if (red_result.max_loc_true != red_id_max && + red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values + return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; + } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true return true; } else { return false; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp index 4696821586..b2c912848a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp @@ -48,10 +48,13 @@ struct StdIsSortedFunctor { : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {} }; +// +// exespace impl +// template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ComparatorType comp) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -75,11 +78,49 @@ bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, } template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_impl(label, ex, first, last, pred_t()); + return is_sorted_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + if (num_elements <= 1) { + return true; + } + + // use num_elements-1 because each index handles i and i+1 + const auto num_elements_minus_one = num_elements - 1; + + // result is incremented by one if sorting breaks at index i + std::size_t result = 0; + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, num_elements_minus_one), + // use CTAD here + StdIsSortedFunctor(first, std::move(comp)), result); + + return result == 0; +} + +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp index 2a0c112bf5..d33580ca53 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -54,10 +54,15 @@ struct StdIsSortedUntilFunctor { m_reducer(std::move(reducer)) {} }; +// +// overloads accepting exespace +// template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -81,7 +86,6 @@ IteratorType is_sorted_until_impl(const std::string& label, label, // use num_elements-1 because each index handles i and i+1 RangePolicy(ex, 0, num_elements - 1), - // use CTAD StdIsSortedUntilFunctor(first, comp, reducer), reducer); /* If the reduction result is equal to the initial value, @@ -98,12 +102,66 @@ IteratorType is_sorted_until_impl(const std::string& label, } template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_until_impl(label, ex, first, last, pred_t()); + return is_sorted_until_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// overloads accepting team handle +// +template +KOKKOS_FUNCTION IteratorType +is_sorted_until_team_impl(const ExecutionSpace& teamHandle, IteratorType first, + IteratorType last, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + // trivial case + if (num_elements <= 1) { + return last; + } + + /* + Do a par_reduce computing the *min* index that breaks the sorting. + If one such index is found, then the range is sorted until that element, + if no such index is found, then it means the range is sorted until the end. + */ + using index_type = typename IteratorType::difference_type; + index_type red_result; + index_type red_result_init; + ::Kokkos::Min reducer(red_result); + reducer.init(red_result_init); + ::Kokkos::parallel_reduce( // use num_elements-1 because each index handles i + // and i+1 + TeamThreadRange(teamHandle, 0, num_elements - 1), + StdIsSortedUntilFunctor(first, comp, reducer), reducer); + teamHandle.team_barrier(); + + /* If the reduction result is equal to the initial value, + and it means the range is sorted until the end */ + if (red_result == red_result_init) { + return last; + } else { + /* If such index is found, then the range is sorted until there and + we need to return an iterator past the element found so do +1 */ + return first + (red_result + 1); + } +} + +template +KOKKOS_FUNCTION IteratorType is_sorted_until_team_impl( + const ExecutionSpace& teamHandle, IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_until_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp index ad7f59232e..b95a66c3bd 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -84,13 +84,15 @@ struct StdLexicographicalCompareFunctor { m_comparator(std::move(_comp)) {} }; +// +// exespace impl +// template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - ComparatorType comp) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); @@ -139,16 +141,84 @@ bool lexicographical_compare_impl(const std::string& label, } template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { using value_type_1 = typename IteratorType1::value_type; using value_type_2 = typename IteratorType2::value_type; using predicate_t = Impl::StdAlgoLessThanBinaryPredicate; - return lexicographical_compare_impl(label, ex, first1, last1, first2, last2, - predicate_t()); + return lexicographical_compare_exespace_impl(label, ex, first1, last1, first2, + last2, predicate_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + // aliases + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + // run + const auto d1 = Kokkos::Experimental::distance(first1, last1); + const auto d2 = Kokkos::Experimental::distance(first2, last2); + const auto range = Kokkos::min(d1, d2); + reduction_value_type red_result; + reducer_type reducer(red_result); + using func1_t = + StdLexicographicalCompareFunctor; + + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, range), + func1_t(first1, first2, reducer, comp), reducer); + + teamHandle.team_barrier(); + + // no mismatch + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + auto new_last1 = first1 + range; + auto new_last2 = first2 + range; + bool is_prefix = (new_last1 == last1) && (new_last2 != last2); + return is_prefix; + } + + // check mismatched + int less = 0; + auto it1 = first1 + red_result.min_loc_true; + auto it2 = first2 + red_result.min_loc_true; + using func2_t = StdCompareFunctor; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, 1), + func2_t(it1, it2, comp), less); + + teamHandle.team_barrier(); + + return static_cast(less); +} + +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + using value_type_1 = typename IteratorType1::value_type; + using value_type_2 = typename IteratorType2::value_type; + using predicate_t = + Impl::StdAlgoLessThanBinaryPredicate; + return lexicographical_compare_team_impl(teamHandle, first1, last1, first2, + last2, predicate_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp index 048420f7a8..2f51db03b4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp @@ -63,12 +63,16 @@ struct StdMinMaxElemFunctor { : m_first(std::move(first)), m_reducer(std::move(reducer)) {} }; +// +// exespace impl +// template