From b8728884fc900dd43f6c565dbe686d5eb5a68d06 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 10 Oct 2022 10:33:47 -0600 Subject: [PATCH 001/448] Drafting stress controls in fix deform --- src/fix_deform.cpp | 505 ++++++++++++++++++++++++++++++++++++--------- src/fix_deform.h | 21 +- 2 files changed, 430 insertions(+), 96 deletions(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index bc6e61a69e..40221df839 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -16,6 +16,11 @@ Contributing author: Pieter in 't Veld (SNL) ------------------------------------------------------------------------- */ +// Save previous state to restart file for derivatives +// define hrate_lo/hi for volume +// add modify command +// add pressure code + #include "fix_deform.h" #include "atom.h" @@ -39,8 +44,9 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -enum{NONE=0,FINAL,DELTA,SCALE,VEL,ERATE,TRATE,VOLUME,WIGGLE,VARIABLE}; +enum{NONE=0,FINAL,DELTA,SCALE,VEL,ERATE,TRATE,VOLUME,WIGGLE,VARIABLE,PRESSURE,PISOTROPIC}; enum{ONE_FROM_ONE,ONE_FROM_TWO,TWO_FROM_ONE}; +enum{NOCOUPLE=0,XYZ,XY,YZ,XZ}; /* ---------------------------------------------------------------------- */ @@ -52,6 +58,8 @@ irregular(nullptr), set(nullptr) no_change_box = 1; restart_global = 1; pre_exchange_migrate = 1; + pcouple = NOCOUPLE; + dimension = domain->dimension; nevery = utils::inumeric(FLERR,arg[3],false,lmp); if (nevery <= 0) error->all(FLERR,"Illegal fix deform command"); @@ -132,7 +140,35 @@ irregular(nullptr), set(nullptr) set[index].hstr = utils::strdup(&arg[iarg+2][2]); set[index].hratestr = utils::strdup(&arg[iarg+3][2]); iarg += 4; - } else error->all(FLERR,"Illegal fix deform command"); + } else if (strcmp(arg[iarg+1],"pressure") == 0) { + if (iarg+4 > narg) error->all(FLERR, "Illegal fix deform command"); + set[index].style = PRESSURE; + if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { + set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); + } else { + if (iarg+ > narg) error->all(FLERR,"Illegal fix deform command"); + set[index].pstr = utils::strdup(&arg[iarg+2][2]); + set[index].pvar = 1; + } + set[index].pgain = utils::numeric(FLERR,arg[iarg+3],false,lmp); + if (set[index].pgain <= 0.0) + error->all(FLERR,"Illegal fix deform command"); + iarg += 4; + } else if (strcmp(arg[iarg+1],"pressure/isotropic") == 0) { + if (iarg+4 > narg) error->all(FLERR, "Illegal fix deform command"); + set[index].style = PISOTROPIC; + if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { + set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); + } else { + if (iarg+ > narg) error->all(FLERR,"Illegal fix deform command"); + set[index].pstr = utils::strdup(&arg[iarg+2][2]); + set[index].pvar = 1; + } + set[index].pgain = utils::numeric(FLERR,arg[iarg+3],false,lmp); + if (set[index].pgain <= 0.0) + error->all(FLERR,"Illegal fix deform command"); + iarg += 4; + } error->all(FLERR,"Illegal fix deform command"); } else if (strcmp(arg[iarg],"xy") == 0 || strcmp(arg[iarg],"xz") == 0 || @@ -190,8 +226,21 @@ irregular(nullptr), set(nullptr) set[index].hstr = utils::strdup(&arg[iarg+2][2]); set[index].hratestr = utils::strdup(&arg[iarg+3][2]); iarg += 4; + } else if (strcmp(arg[iarg+1],"pressure") == 0) { + if (iarg+4 > narg) error->all(FLERR, "Illegal fix deform command"); + set[index].style = PRESSURE; + if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { + set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); + } else { + if (iarg+ > narg) error->all(FLERR,"Illegal fix deform command"); + set[index].pstr = utils::strdup(&arg[iarg+2][2]); + set[index].pvar = 1; + } + set[index].pgain = utils::numeric(FLERR,arg[iarg+3],false,lmp); + if (set[index].pgain <= 0.0) + error->all(FLERR,"Illegal fix deform command"); + iarg += 4; } else error->all(FLERR,"Illegal fix deform command"); - } else break; } @@ -201,6 +250,63 @@ irregular(nullptr), set(nullptr) options(narg-iarg,&arg[iarg]); if (remapflag != Domain::X_REMAP) restart_pbc = 1; + // populate coupled pressure controls + + if (pcouple != NOCOUPLE) { + int coupled_indices[3] = {0}; + int j = -1; + double couple_gain, coupled_pressure; + char *couple_str; + + if (pcouple == XYZ || pcouple == XY || pcouple == XZ) + coupled_indices[0] = 1; + if (pcouple == XYZ || pcouple == XY || pcouple == YZ) + coupled_indices[1] = 1; + if (pcouple == XYZ || pcouple == XZ || pcouple == YZ) + coupled_indices[2] = 1; + + // Check coupled styles and find reference + for (int i = 0; i < 3; i++) { + if (coupled_indices[i]) { + set[i].coupled_flag = 1; + if (set[i].style != VOLUME && set[i].style != PRESSURE && set[i].style != NONE) + error->all(FLERR, "Cannot couple dimensions not controlled by pressure or volume in fix deform"); + if (set[i].style == PRESSURE || set[i].style == VOLUME) + j = i; + } + } + + if (j == -1) + error->all(FLERR, "Must specify pressure style for a coupled dimension in fix deform"); + + // Copy data to each coupled dimension + for (int i = 0; i < 3; i++) { + if (coupled_indices[i]) { + if (set[j].style != set[i].style && set[i].style != NONE) + error->all(FLERR, "Cannot couple dimensions with different control options"); + + if (set[j].style == PRESSURE && set[i].style == NONE) { + set[i].style = PRESSURE; + set[i].pgain = set[j].pgain; + if (set[j].pvar == 1) { + set[i].pstr = set[j].pstr; + set[i].pvar = 1; + } else { + set[i].ptarget = set[j].ptarget; + } + } else if (set[j].style == VOLUME && set[i].style == NONE) { + set[i].style = VOLUME; + set[i].saved = -1; + if (domain->dimension == 2) + error->all(FLERR, "Cannot couple pressure with constant volume in two dimensions"); + } + } else { + if (set[i].style == VOLUME && set[j].style == VOLUME) + error->all(FLERR, "Dimensions used to maintain constant volume must either be all be coupled or not coupled"); + } + } + } + // setup dimflags used by other classes to check for volume-change conflicts for (int i = 0; i < 6; i++) @@ -285,32 +391,31 @@ irregular(nullptr), set(nullptr) // for VOLUME, setup links to other dims // fixed, dynamic1, dynamic2 + volume_flag = 0; for (int i = 0; i < 3; i++) { if (set[i].style != VOLUME) continue; + volume_flag = 1; int other1 = (i+1) % 3; int other2 = (i+2) % 3; - if (set[other1].style == NONE) { + // Cannot use VOLUME option without at least one deformed dimension + if (set[other1].style == NONE || set[other1].style == VOLUME) if (set[other2].style == NONE || set[other2].style == VOLUME) error->all(FLERR,"Fix deform volume setting is invalid"); + + if (set[other1].style == NONE) { set[i].substyle = ONE_FROM_ONE; set[i].fixed = other1; set[i].dynamic1 = other2; } else if (set[other2].style == NONE) { - if (set[other1].style == NONE || set[other1].style == VOLUME) - error->all(FLERR,"Fix deform volume setting is invalid"); set[i].substyle = ONE_FROM_ONE; set[i].fixed = other2; set[i].dynamic1 = other1; } else if (set[other1].style == VOLUME) { - if (set[other2].style == NONE || set[other2].style == VOLUME) - error->all(FLERR,"Fix deform volume setting is invalid"); set[i].substyle = TWO_FROM_ONE; set[i].fixed = other1; set[i].dynamic1 = other2; } else if (set[other2].style == VOLUME) { - if (set[other1].style == NONE || set[other1].style == VOLUME) - error->all(FLERR,"Fix deform volume setting is invalid"); set[i].substyle = TWO_FROM_ONE; set[i].fixed = other2; set[i].dynamic1 = other1; @@ -321,12 +426,34 @@ irregular(nullptr), set(nullptr) } } + // set strain_flag + strain_flag = 0; + for (int i = 0; i < 6; i++) + if (set[i].style != NONE && set[i].style != VOLUME && + set[i].style != PRESSURE && set[i].style != PISOTROPIC) + strain_flag = 1; + // set varflag varflag = 0; for (int i = 0; i < 6; i++) if (set[i].style == VARIABLE) varflag = 1; + // set pressure_flag + + pressure_flag = 0; + for (int i = 0; i < 6; i++) { + if (set[i].style == PRESSURE || set[i].style == PISOTROPIC) pressure_flag = 1; + if (set[i].coupled_flag) pressure_flag = 1; + } + + // check conflict between constant volume/pressure + + if (volume_flag) + for (int i = 0; i < 6; i++) + if (set[i].style == PISOTROPIC) + error->all(FLERR, "Cannot use fix deform to assign constant volume and pressure"); + // set initial values at time fix deform is issued for (int i = 0; i < 3; i++) { @@ -350,6 +477,29 @@ irregular(nullptr), set(nullptr) else irregular = nullptr; TWOPI = 2.0*MY_PI; + + // Create pressure compute, if needed + + pflag = 0; + tflag = 0; + if (pressure_flag) { + // create a new compute temp style + // id = fix-ID + temp + // compute group = all since pressure is always global (group all) + // and thus its KE/temperature contribution should use group all + + id_temp = utils::strdup(std::string(id) + "_temp"); + modify->add_compute(fmt::format("{} all temp",id_temp)); + tflag = 1; + + // create a new compute pressure style + // id = fix-ID + press, compute group = all + // pass id_temp as 4th arg to pressure constructor + + id_press = utils::strdup(std::string(id) + "_press"); + modify->add_compute(fmt::format("{} all pressure {}",id_press, id_temp)); + pflag = 1; + } } /* ---------------------------------------------------------------------- */ @@ -360,6 +510,7 @@ FixDeform::~FixDeform() for (int i = 0; i < 6; i++) { delete[] set[i].hstr; delete[] set[i].hratestr; + delete[] set[i].pstr; } } delete[] set; @@ -374,6 +525,13 @@ FixDeform::~FixDeform() h_rate[0] = h_rate[1] = h_rate[2] = h_rate[3] = h_rate[4] = h_rate[5] = 0.0; h_ratelo[0] = h_ratelo[1] = h_ratelo[2] = 0.0; + + // delete temperature and pressure if fix created them + + if (tflag) modify->delete_compute(id_temp); + if (pflag) modify->delete_compute(id_press); + delete [] id_temp; + delete [] id_press; } /* ---------------------------------------------------------------------- */ @@ -574,7 +732,7 @@ void FixDeform::init() // set domain->h_rate values for use by domain and other fixes/computes // initialize all rates to 0.0 - // cannot set here for TRATE,VOLUME,WIGGLE,VARIABLE since not constant + // cannot set here for TRATE,VOLUME,WIGGLE,VARIABLE,PRESSURE since not constant h_rate = domain->h_rate; h_ratelo = domain->h_ratelo; @@ -611,6 +769,31 @@ void FixDeform::init() for (auto ifix : modify->get_fix_list()) if (ifix->rigid_flag) rfix.push_back(ifix); + + // Find pressure/temp computes if needed + + if (pressure_flag) { + int icompute = modify->find_compute(id_temp); + if (icompute < 0) + error->all(FLERR,"Temperature ID for fix deform does not exist"); + temperature = modify->compute[icompute]; + + icompute = modify->find_compute(id_press); + if (icompute < 0) + error->all(FLERR,"Pressure ID for fix deform does not exist"); + pressure = modify->compute[icompute]; + } +} + +/* ---------------------------------------------------------------------- + compute T,P if needed before integrator starts +------------------------------------------------------------------------- */ + +void FixDeform::setup(int /*vflag*/) +{ + // trigger virial computation on next timestep + + if (pressure_flag) pressure->addstep(update->ntimestep+1); } /* ---------------------------------------------------------------------- @@ -660,95 +843,40 @@ void FixDeform::end_of_step() if (varflag) modify->clearstep_compute(); - // set new box size - // for NONE, target is current box size - // for TRATE, set target directly based on current time, also set h_rate - // for WIGGLE, set target directly based on current time, also set h_rate - // for VARIABLE, set target directly via variable eval, also set h_rate - // for others except VOLUME, target is linear value between start and stop + // set new box size for strain-based dims - for (i = 0; i < 3; i++) { - if (set[i].style == NONE) { - set[i].lo_target = domain->boxlo[i]; - set[i].hi_target = domain->boxhi[i]; - } else if (set[i].style == TRATE) { - double delt = (update->ntimestep - update->beginstep) * update->dt; - set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) - - 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); - set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) + - 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); - h_rate[i] = set[i].rate * domain->h[i]; - h_ratelo[i] = -0.5*h_rate[i]; - } else if (set[i].style == WIGGLE) { - double delt = (update->ntimestep - update->beginstep) * update->dt; - set[i].lo_target = set[i].lo_start - - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - set[i].hi_target = set[i].hi_start + - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * - cos(TWOPI*delt/set[i].tperiod); - h_ratelo[i] = -0.5*h_rate[i]; - } else if (set[i].style == VARIABLE) { - double del = input->variable->compute_equal(set[i].hvar); - set[i].lo_target = set[i].lo_start - 0.5*del; - set[i].hi_target = set[i].hi_start + 0.5*del; - h_rate[i] = input->variable->compute_equal(set[i].hratevar); - h_ratelo[i] = -0.5*h_rate[i]; - } else if (set[i].style != VOLUME) { - set[i].lo_target = set[i].lo_start + - delta*(set[i].lo_stop - set[i].lo_start); - set[i].hi_target = set[i].hi_start + - delta*(set[i].hi_stop - set[i].hi_start); + set_strain(); + + // set new box size for pressure-based dims + + if (pressure_flag) { + temperature->compute_vector(); + pressure->compute_vector(); + for (int i = 0; i < 3; i++) { + if (set[i].saved == -1) { + set[i].saved = 1; + set[i].rate = 0.0; + set[i].prior_pressure = pressure->vector[i]; + if (i == 0) set[i].box_length = domain->xprd; + else if (i == 1) set[i].box_length = domain->yprd; + else (i == 2) set[i].box_length = domain->zprd; + } } + set_pressure(); } // set new box size for VOLUME dims that are linked to other dims // NOTE: still need to set h_rate for these dims - for (i = 0; i < 3; i++) { - if (set[i].style != VOLUME) continue; + if (volume_flag) set_volume(); - if (set[i].substyle == ONE_FROM_ONE) { - set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) - - 0.5*(set[i].vol_start / - (set[set[i].dynamic1].hi_target - - set[set[i].dynamic1].lo_target) / - (set[set[i].fixed].hi_start-set[set[i].fixed].lo_start)); - set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) + - 0.5*(set[i].vol_start / - (set[set[i].dynamic1].hi_target - - set[set[i].dynamic1].lo_target) / - (set[set[i].fixed].hi_start-set[set[i].fixed].lo_start)); + // Save pressure/strain rate if required - } else if (set[i].substyle == ONE_FROM_TWO) { - set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) - - 0.5*(set[i].vol_start / - (set[set[i].dynamic1].hi_target - - set[set[i].dynamic1].lo_target) / - (set[set[i].dynamic2].hi_target - - set[set[i].dynamic2].lo_target)); - set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) + - 0.5*(set[i].vol_start / - (set[set[i].dynamic1].hi_target - - set[set[i].dynamic1].lo_target) / - (set[set[i].dynamic2].hi_target - - set[set[i].dynamic2].lo_target)); - - } else if (set[i].substyle == TWO_FROM_ONE) { - set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) - - 0.5*sqrt(set[i].vol_start / - (set[set[i].dynamic1].hi_target - - set[set[i].dynamic1].lo_target) / - (set[set[i].fixed].hi_start - - set[set[i].fixed].lo_start) * - (set[i].hi_start - set[i].lo_start)); - set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) + - 0.5*sqrt(set[i].vol_start / - (set[set[i].dynamic1].hi_target - - set[set[i].dynamic1].lo_target) / - (set[set[i].fixed].hi_start - - set[set[i].fixed].lo_start) * - (set[i].hi_start - set[i].lo_start)); + if (pressure_flag) { + double dt_inv = 1.0 / update->dt; + for (int i = 0; i < 3; i++) { + set[i].prior_pressure = pressure->vector[i]; + set[i].prior_rate = ((set[i].hi_target - set[i].lo_target) / set[i].box_length - 1.0) * dt_inv; } } @@ -882,7 +1010,7 @@ void FixDeform::end_of_step() if (mask[i] & groupbit) domain->x2lamda(x[i],x[i]); - for (auto &ifix : rfix) + for (auto ifix : rfix) ifix->deform(0); } @@ -921,13 +1049,134 @@ void FixDeform::end_of_step() if (mask[i] & groupbit) domain->lamda2x(x[i],x[i]); - for (auto &ifix : rfix) + for (auto ifix : rfix) ifix->deform(1); } // redo KSpace coeffs since box has changed if (kspace_flag) force->kspace->setup(); + + // trigger virial computation, if needed, on next timestep + + if (pressure_flag) { + pressure->addstep(update->ntimestep+1); + set[0].box_length = domain->xprd; + set[1].box_length = domain->yprd; + set[2].box_length = domain->zprd; + } +} + +/* ---------------------------------------------------------------------- + set box size for strain-based dimensions +------------------------------------------------------------------------- */ + +void FixDeform::set_strain() +{ + // for NONE, target is current box size + // for TRATE, set target directly based on current time, also set h_rate + // for WIGGLE, set target directly based on current time, also set h_rate + // for VARIABLE, set target directly via variable eval, also set h_rate + // for others except VOLUME, target is linear value between start and stop + + for (int i = 0; i < 3; i++) { + if (set[i].style == NONE) { + set[i].lo_target = domain->boxlo[i]; + set[i].hi_target = domain->boxhi[i]; + } else if (set[i].style == TRATE) { + double delt = (update->ntimestep - update->beginstep) * update->dt; + set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) - + 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); + set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) + + 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); + h_rate[i] = set[i].rate * domain->h[i]; + h_ratelo[i] = -0.5*h_rate[i]; + } else if (set[i].style == WIGGLE) { + double delt = (update->ntimestep - update->beginstep) * update->dt; + set[i].lo_target = set[i].lo_start - + 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); + set[i].hi_target = set[i].hi_start + + 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); + h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * + cos(TWOPI*delt/set[i].tperiod); + h_ratelo[i] = -0.5*h_rate[i]; + } else if (set[i].style == VARIABLE) { + double del = input->variable->compute_equal(set[i].hvar); + set[i].lo_target = set[i].lo_start - 0.5*del; + set[i].hi_target = set[i].hi_start + 0.5*del; + h_rate[i] = input->variable->compute_equal(set[i].hratevar); + h_ratelo[i] = -0.5*h_rate[i]; + } else if (set[i].style != VOLUME) { + set[i].lo_target = set[i].lo_start + + delta*(set[i].lo_stop - set[i].lo_start); + set[i].hi_target = set[i].hi_start + + delta*(set[i].hi_stop - set[i].hi_start); + } + } +} + +/* ---------------------------------------------------------------------- + set box size for pressure-based dimensions +------------------------------------------------------------------------- */ + +void FixDeform::set_pressure() +{ + for (int i = 0; i < 3; i++) { + + + } + // must define hi+lo target + // + +} + +/* ---------------------------------------------------------------------- + set box size for VOLUME dimensions +------------------------------------------------------------------------- */ + +void FixDeform::set_volume() +{ + for (int i = 0; i < 3; i++) { + if (set[i].style != VOLUME) continue; + + double v0 = set[i].vol_start; + double center_start = 0.5 * (set[i].lo_start + set[i].hi_start); + double offset; + + if (set[i].substyle == ONE_FROM_ONE) { + offset = 0.5 * (v0 / + (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / + (set[set[i].fixed].hi_start-set[set[i].fixed].lo_start)); + } else if (set[i].substyle == ONE_FROM_TWO) { + offset = 0.5 * (v0 / + (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / + (set[set[i].dynamic2].hi_target - set[set[i].dynamic2].lo_target)) + } else if (set[i].substyle == TWO_FROM_ONE) { + if (!set[i].coupled_flag) { + offset = 0.5 * sqrt(v0 * (set[i].hi_start - set[i].lo_start) / + (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / + (set[set[i].fixed].hi_start - set[set[i].fixed].lo_start)) + } else { + double p1 = pressure->vector[i]; + double p2 = pressure->vector[set[i].fixed]; + double p1i = set[i].prior_pressure; + double p2i = set[set[i].fixed].prior_pressure; + double e1i = set[i].prior_rate; + double e2i = set[set[i].fixed].prior_rate; + double L3 = (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target); + double dt = update->dt; + + double e3 = (L3 / set[set[i].dynamic1].box_length - 1.0) / dt; + double e1 = -e3 * dt / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2); + e1 /= (p2 - p2i + (p1 - p1i) / e1p * e2p); + double e2 = (1.0 - (1 + e3 * dt) * (1 + e1 * dt)); + e2 /= (1 + e3 * dt) * (1 + e1 * dt) * dt; + offset = 0.5 * set[i].box_length * (1.0 - e3 * dt); + } + } + set[i].lo_target = center_start - offset; + set[i].hi_target = center_start + offset; + } } /* ---------------------------------------------------------------------- @@ -937,7 +1186,7 @@ void FixDeform::end_of_step() void FixDeform::write_restart(FILE *fp) { if (comm->me == 0) { - int size = 6*sizeof(Set); + int size = 6 * sizeof(Set); fwrite(&size,sizeof(int),1,fp); fwrite(set,sizeof(Set),6,fp); } @@ -996,8 +1245,22 @@ void FixDeform::options(int narg, char **arg) if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command"); flipflag = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; + } else if (strcmp(arg[iarg],"couple") == 0) { + if (iarg+2 > narg) + error->all(FLERR,"Illegal fix fix deform command"); + if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ; + else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY; + else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ; + else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ; + else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NOCOUPLE; + else error->all(FLERR,"Illegal fix fix deform command"); + iarg += 2; } else error->all(FLERR,"Illegal fix deform command"); } + + if (dimension == 2) + if (pcouple == XYZ || pcouple == XZ || pcouple == YZ) + error->all(FLERR, "Cannot couple Z dimension in fix deform in 2D"); } /* ---------------------------------------------------------------------- @@ -1010,3 +1273,57 @@ double FixDeform::memory_usage() if (irregular) bytes += irregular->memory_usage(); return bytes; } + + +/* ---------------------------------------------------------------------- */ + +int FixDeform::modify_param(int narg, char **arg) +{ + if (!pressure_flag) error->all(FLERR,"Cannot modify fix deform without a pressure control"); + if (strcmp(arg[0],"temp") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + if (tflag) { + modify->delete_compute(id_temp); + tflag = 0; + } + delete [] id_temp; + id_temp = utils::strdup(arg[1]); + + int icompute = modify->find_compute(arg[1]); + if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); + temperature = modify->compute[icompute]; + + if (temperature->tempflag == 0) + error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); + if (temperature->igroup != 0 && comm->me == 0) + error->warning(FLERR,"Temperature for deform is not for group all"); + + // reset id_temp of pressure to new temperature ID + + icompute = modify->find_compute(id_press); + if (icompute < 0) + error->all(FLERR,"Pressure ID for fix deform does not exist"); + modify->compute[icompute]->reset_extra_compute_fix(id_temp); + + return 2; + + } else if (strcmp(arg[0],"press") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + if (pflag) { + modify->delete_compute(id_press); + pflag = 0; + } + delete [] id_press; + id_press = utils::strdup(arg[1]); + + int icompute = modify->find_compute(arg[1]); + if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); + pressure = modify->compute[icompute]; + + if (pressure->pressflag == 0) + error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); + return 2; + } + + return 0; +} diff --git a/src/fix_deform.h b/src/fix_deform.h index 76f5fc9d4a..7297874258 100644 --- a/src/fix_deform.h +++ b/src/fix_deform.h @@ -33,23 +33,32 @@ class FixDeform : public Fix { ~FixDeform() override; int setmask() override; void init() override; + void setup(int) override; void pre_exchange() override; void end_of_step() override; void write_restart(FILE *) override; void restart(char *buf) override; double memory_usage() override; + int modify_param(int, char **) override; protected: - int triclinic, scaleflag, flipflag; + int dimension, triclinic, scaleflag, flipflag, pcouple; int flip, flipxy, flipxz, flipyz; double *h_rate, *h_ratelo; int varflag; // 1 if VARIABLE option is used, 0 if not + int strain_flag; // 1 if strain-based option is used, 0 if not + int volume_flag; // 1 if VOLUME option is used, 0 if not + int pressure_flag; // 1 if pressure tensor used, 0 if not int kspace_flag; // 1 if KSpace invoked, 0 if not std::vector rfix; // pointers to rigid fixes class Irregular *irregular; // for migrating atoms after box flips double TWOPI; + char *id_temp, *id_press; + class Compute *temperature, *pressure; + int tflag, pflag; + struct Set { int style, substyle; double flo, fhi, ftilt; @@ -61,13 +70,21 @@ class FixDeform : public Fix { double tilt_initial, tilt_start, tilt_stop, tilt_target, tilt_flip; double tilt_min, tilt_max; double vol_initial, vol_start; + double ptarget, pgain; + double prior_pressure, prior_rate; + double box_length; + int saved; int fixed, dynamic1, dynamic2; - char *hstr, *hratestr; + char *hstr, *hratestr, *pstr; int hvar, hratevar; + int pvar; + int coupled_flag; }; Set *set; void options(int, char **); + void set_volume(); + void couple(); }; } // namespace LAMMPS_NS From 66471c146537d8d1a32e00c8c7d9e68c4bc8ff07 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Tue, 11 Oct 2022 16:52:02 -0600 Subject: [PATCH 002/448] Adding pressure controls and fixing misc errors --- src/fix_deform.cpp | 375 ++++++++++++++++++++++++++++----------------- src/fix_deform.h | 4 +- 2 files changed, 235 insertions(+), 144 deletions(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 40221df839..766d7c22ae 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -17,14 +17,14 @@ ------------------------------------------------------------------------- */ // Save previous state to restart file for derivatives -// define hrate_lo/hi for volume -// add modify command -// add pressure code +// define hrate_lo/hi for volume/pressure +// add logic for hi_stop and flip flag #include "fix_deform.h" #include "atom.h" #include "comm.h" +#include "compute.h" #include "domain.h" #include "error.h" #include "force.h" @@ -84,36 +84,36 @@ irregular(nullptr), set(nullptr) else if (strcmp(arg[iarg],"y") == 0) index = 1; else if (strcmp(arg[iarg],"z") == 0) index = 2; - if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform", error); if (strcmp(arg[iarg+1],"final") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform final", error); set[index].style = FINAL; set[index].flo = utils::numeric(FLERR,arg[iarg+2],false,lmp); set[index].fhi = utils::numeric(FLERR,arg[iarg+3],false,lmp); iarg += 4; } else if (strcmp(arg[iarg+1],"delta") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform delta", error); set[index].style = DELTA; set[index].dlo = utils::numeric(FLERR,arg[iarg+2],false,lmp); set[index].dhi = utils::numeric(FLERR,arg[iarg+3],false,lmp); iarg += 4; } else if (strcmp(arg[iarg+1],"scale") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform scale", error); set[index].style = SCALE; set[index].scale = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"vel") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform vel", error); set[index].style = VEL; set[index].vel = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"erate") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform erate", error); set[index].style = ERATE; set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"trate") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform trate", error); set[index].style = TRATE; set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; @@ -121,54 +121,52 @@ irregular(nullptr), set(nullptr) set[index].style = VOLUME; iarg += 2; } else if (strcmp(arg[iarg+1],"wiggle") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform wiggle", error); set[index].style = WIGGLE; set[index].amplitude = utils::numeric(FLERR,arg[iarg+2],false,lmp); set[index].tperiod = utils::numeric(FLERR,arg[iarg+3],false,lmp); if (set[index].tperiod <= 0.0) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform wiggle period, must be positive"); iarg += 4; } else if (strcmp(arg[iarg+1],"variable") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform variable", error); set[index].style = VARIABLE; if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform variable name {}", arg[iarg+2]); if (strstr(arg[iarg+3],"v_") != arg[iarg+3]) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform variable name {}", arg[iarg+3]); delete[] set[index].hstr; delete[] set[index].hratestr; set[index].hstr = utils::strdup(&arg[iarg+2][2]); set[index].hratestr = utils::strdup(&arg[iarg+3][2]); iarg += 4; } else if (strcmp(arg[iarg+1],"pressure") == 0) { - if (iarg+4 > narg) error->all(FLERR, "Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform pressure", error); set[index].style = PRESSURE; if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); } else { - if (iarg+ > narg) error->all(FLERR,"Illegal fix deform command"); set[index].pstr = utils::strdup(&arg[iarg+2][2]); - set[index].pvar = 1; + set[index].pvar_flag = 1; } set[index].pgain = utils::numeric(FLERR,arg[iarg+3],false,lmp); if (set[index].pgain <= 0.0) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform pressure gain, must be positive"); iarg += 4; } else if (strcmp(arg[iarg+1],"pressure/isotropic") == 0) { - if (iarg+4 > narg) error->all(FLERR, "Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform pressure/isotropic", error); set[index].style = PISOTROPIC; if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); } else { - if (iarg+ > narg) error->all(FLERR,"Illegal fix deform command"); set[index].pstr = utils::strdup(&arg[iarg+2][2]); - set[index].pvar = 1; + set[index].pvar_flag = 1; } set[index].pgain = utils::numeric(FLERR,arg[iarg+3],false,lmp); if (set[index].pgain <= 0.0) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform pressure gain, must be positive"); iarg += 4; - } error->all(FLERR,"Illegal fix deform command"); + } else error->all(FLERR,"Illegal fix deform command argument: {}", arg[iarg+1]); } else if (strcmp(arg[iarg],"xy") == 0 || strcmp(arg[iarg],"xz") == 0 || @@ -180,67 +178,66 @@ irregular(nullptr), set(nullptr) else if (strcmp(arg[iarg],"xz") == 0) index = 4; else if (strcmp(arg[iarg],"yz") == 0) index = 3; - if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform", error); if (strcmp(arg[iarg+1],"final") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform final", error); set[index].style = FINAL; set[index].ftilt = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"delta") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform delta", error); set[index].style = DELTA; set[index].dtilt = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"vel") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform vel", error); set[index].style = VEL; set[index].vel = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"erate") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform erate", error); set[index].style = ERATE; set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"trate") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix deform trate", error); set[index].style = TRATE; set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp); iarg += 3; } else if (strcmp(arg[iarg+1],"wiggle") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform wiggle", error); set[index].style = WIGGLE; set[index].amplitude = utils::numeric(FLERR,arg[iarg+2],false,lmp); set[index].tperiod = utils::numeric(FLERR,arg[iarg+3],false,lmp); if (set[index].tperiod <= 0.0) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform wiggle period, must be positive"); iarg += 4; } else if (strcmp(arg[iarg+1],"variable") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform variable", error); set[index].style = VARIABLE; if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform variable name {}", arg[iarg+2]); if (strstr(arg[iarg+3],"v_") != arg[iarg+3]) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform variable name {}", arg[iarg+3]); delete[] set[index].hstr; delete[] set[index].hratestr; set[index].hstr = utils::strdup(&arg[iarg+2][2]); set[index].hratestr = utils::strdup(&arg[iarg+3][2]); iarg += 4; } else if (strcmp(arg[iarg+1],"pressure") == 0) { - if (iarg+4 > narg) error->all(FLERR, "Illegal fix deform command"); + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform pressure", error); set[index].style = PRESSURE; if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); } else { - if (iarg+ > narg) error->all(FLERR,"Illegal fix deform command"); set[index].pstr = utils::strdup(&arg[iarg+2][2]); - set[index].pvar = 1; + set[index].pvar_flag = 1; } set[index].pgain = utils::numeric(FLERR,arg[iarg+3],false,lmp); if (set[index].pgain <= 0.0) - error->all(FLERR,"Illegal fix deform command"); + error->all(FLERR,"Illegal fix deform pressure gain, must be positive"); iarg += 4; - } else error->all(FLERR,"Illegal fix deform command"); + } else error->all(FLERR,"Illegal fix deform command: {}", arg[iarg+1]); } else break; } @@ -270,7 +267,7 @@ irregular(nullptr), set(nullptr) if (coupled_indices[i]) { set[i].coupled_flag = 1; if (set[i].style != VOLUME && set[i].style != PRESSURE && set[i].style != NONE) - error->all(FLERR, "Cannot couple dimensions not controlled by pressure or volume in fix deform"); + error->all(FLERR, "Cannot couple dimensions unless they are controlled using the pressure or volume option in fix deform"); if (set[i].style == PRESSURE || set[i].style == VOLUME) j = i; } @@ -279,30 +276,45 @@ irregular(nullptr), set(nullptr) if (j == -1) error->all(FLERR, "Must specify pressure style for a coupled dimension in fix deform"); - // Copy data to each coupled dimension + // Copy or compare data for each coupled dimension for (int i = 0; i < 3; i++) { if (coupled_indices[i]) { - if (set[j].style != set[i].style && set[i].style != NONE) - error->all(FLERR, "Cannot couple dimensions with different control options"); - + // Copy coupling information if dimension style is undefined if (set[j].style == PRESSURE && set[i].style == NONE) { set[i].style = PRESSURE; set[i].pgain = set[j].pgain; - if (set[j].pvar == 1) { + if (set[j].pvar_flag) { set[i].pstr = set[j].pstr; - set[i].pvar = 1; + set[i].pvar_flag = 1; } else { set[i].ptarget = set[j].ptarget; } } else if (set[j].style == VOLUME && set[i].style == NONE) { set[i].style = VOLUME; - set[i].saved = -1; if (domain->dimension == 2) error->all(FLERR, "Cannot couple pressure with constant volume in two dimensions"); } + + // Check for incompatibilities in style + if (set[j].style != set[i].style && set[i].style != NONE) + error->all(FLERR, "Cannot couple dimensions with different control options"); + if (set[j].style != PRESSURE) continue; + + // If pressure controlled, check for incompatibilities in parameters + if (set[i].pgain != set[j].pgain) + error->all(FLERR, "Coupled dimensions must have identical gain parameters\n"); + + if (set[i].pvar_flag != set[j].pvar_flag) + error->all(FLERR, "Coupled dimensions must have the same target pressure\n"); + if (set[j].pvar_flag) + if (strcmp(set[i].pstr, set[j].pstr) != 0) + error->all(FLERR, "Coupled dimensions must have the same target pressure\n"); + if (set[i].ptarget != set[j].ptarget) + error->all(FLERR, "Coupled dimensions must have the same target pressure\n"); + } else { if (set[i].style == VOLUME && set[j].style == VOLUME) - error->all(FLERR, "Dimensions used to maintain constant volume must either be all be coupled or not coupled"); + error->all(FLERR, "Dimensions used to maintain constant volume must either all be coupled or uncoupled"); } } } @@ -427,6 +439,7 @@ irregular(nullptr), set(nullptr) } // set strain_flag + strain_flag = 0; for (int i = 0; i < 6; i++) if (set[i].style != NONE && set[i].style != VOLUME && @@ -569,14 +582,25 @@ void FixDeform::init() if (set[i].style != VARIABLE) continue; set[i].hvar = input->variable->find(set[i].hstr); if (set[i].hvar < 0) - error->all(FLERR,"Variable name for fix deform does not exist"); + error->all(FLERR,"Variable name {} for fix deform does not exist", set[i].hstr); if (!input->variable->equalstyle(set[i].hvar)) - error->all(FLERR,"Variable for fix deform is invalid style"); + error->all(FLERR,"Variable {} for fix deform is invalid style", set[i].hstr); set[i].hratevar = input->variable->find(set[i].hratestr); if (set[i].hratevar < 0) - error->all(FLERR,"Variable name for fix deform does not exist"); + error->all(FLERR,"Variable name {} for fix deform does not exist", set[i].hratestr); if (!input->variable->equalstyle(set[i].hratevar)) - error->all(FLERR,"Variable for fix deform is invalid style"); + error->all(FLERR,"Variable {} for fix deform is invalid style", set[i].hratestr); + } + + // check optional variables for PRESSURE or PISOTROPIC style + + for (int i = 0; i < 6; i++) { + if (!set[i].pvar_flag) continue; + set[i].pvar = input->variable->find(set[i].pstr); + if (set[i].pvar < 0) + error->all(FLERR,"Variable name {} for fix deform does not exist", set[i].pstr); + if (!input->variable->equalstyle(set[i].pvar)) + error->all(FLERR,"Variable {} for fix deform is invalid style", set[i].pstr); } // set start/stop values for box size and shape @@ -834,11 +858,6 @@ void FixDeform::pre_exchange() void FixDeform::end_of_step() { - int i; - - double delta = update->ntimestep - update->beginstep; - if (delta != 0.0) delta /= update->endstep - update->beginstep; - // wrap variable evaluations with clear/add if (varflag) modify->clearstep_compute(); @@ -853,13 +872,13 @@ void FixDeform::end_of_step() temperature->compute_vector(); pressure->compute_vector(); for (int i = 0; i < 3; i++) { - if (set[i].saved == -1) { + if (! set[i].saved) { set[i].saved = 1; - set[i].rate = 0.0; + set[i].prior_rate = 0.0; set[i].prior_pressure = pressure->vector[i]; if (i == 0) set[i].box_length = domain->xprd; else if (i == 1) set[i].box_length = domain->yprd; - else (i == 2) set[i].box_length = domain->zprd; + else set[i].box_length = domain->zprd; } } set_pressure(); @@ -880,61 +899,6 @@ void FixDeform::end_of_step() } } - // for triclinic, set new box shape - // for NONE, target is current tilt - // for TRATE, set target directly based on current time. also set h_rate - // for WIGGLE, set target directly based on current time. also set h_rate - // for VARIABLE, set target directly via variable eval. also set h_rate - // for other styles, target is linear value between start and stop values - - if (triclinic) { - double *h = domain->h; - - for (i = 3; i < 6; i++) { - if (set[i].style == NONE) { - if (i == 5) set[i].tilt_target = domain->xy; - else if (i == 4) set[i].tilt_target = domain->xz; - else if (i == 3) set[i].tilt_target = domain->yz; - } else if (set[i].style == TRATE) { - double delt = (update->ntimestep - update->beginstep) * update->dt; - set[i].tilt_target = set[i].tilt_start * exp(set[i].rate*delt); - h_rate[i] = set[i].rate * domain->h[i]; - } else if (set[i].style == WIGGLE) { - double delt = (update->ntimestep - update->beginstep) * update->dt; - set[i].tilt_target = set[i].tilt_start + - set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * - cos(TWOPI*delt/set[i].tperiod); - } else if (set[i].style == VARIABLE) { - double delta_tilt = input->variable->compute_equal(set[i].hvar); - set[i].tilt_target = set[i].tilt_start + delta_tilt; - h_rate[i] = input->variable->compute_equal(set[i].hratevar); - } else { - set[i].tilt_target = set[i].tilt_start + - delta*(set[i].tilt_stop - set[i].tilt_start); - } - - // tilt_target can be large positive or large negative value - // add/subtract box lengths until tilt_target is closest to current value - - int idenom = 0; - if (i == 5) idenom = 0; - else if (i == 4) idenom = 0; - else if (i == 3) idenom = 1; - double denom = set[idenom].hi_target - set[idenom].lo_target; - - double current = h[i]/h[idenom]; - - while (set[i].tilt_target/denom - current > 0.0) - set[i].tilt_target -= denom; - while (set[i].tilt_target/denom - current < 0.0) - set[i].tilt_target += denom; - if (fabs(set[i].tilt_target/denom - 1.0 - current) < - fabs(set[i].tilt_target/denom - current)) - set[i].tilt_target -= denom; - } - } - if (varflag) modify->addstep_compute(update->ntimestep + nevery); // if any tilt ratios exceed 0.5, set flip = 1 and compute new tilt values @@ -1006,7 +970,7 @@ void FixDeform::end_of_step() int *mask = atom->mask; int nlocal = atom->nlocal; - for (i = 0; i < nlocal; i++) + for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) domain->x2lamda(x[i],x[i]); @@ -1045,7 +1009,7 @@ void FixDeform::end_of_step() int *mask = atom->mask; int nlocal = atom->nlocal; - for (i = 0; i < nlocal; i++) + for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) domain->lamda2x(x[i],x[i]); @@ -1079,6 +1043,9 @@ void FixDeform::set_strain() // for VARIABLE, set target directly via variable eval, also set h_rate // for others except VOLUME, target is linear value between start and stop + double delta = update->ntimestep - update->beginstep; + if (delta != 0.0) delta /= update->endstep - update->beginstep; + for (int i = 0; i < 3; i++) { if (set[i].style == NONE) { set[i].lo_target = domain->boxlo[i]; @@ -1113,6 +1080,62 @@ void FixDeform::set_strain() delta*(set[i].hi_stop - set[i].hi_start); } } + + + // for triclinic, set new box shape + // for NONE, target is current tilt + // for TRATE, set target directly based on current time. also set h_rate + // for WIGGLE, set target directly based on current time. also set h_rate + // for VARIABLE, set target directly via variable eval. also set h_rate + // for other styles, target is linear value between start and stop values + + if (triclinic) { + double *h = domain->h; + + for (int i = 3; i < 6; i++) { + if (set[i].style == NONE) { + if (i == 5) set[i].tilt_target = domain->xy; + else if (i == 4) set[i].tilt_target = domain->xz; + else if (i == 3) set[i].tilt_target = domain->yz; + } else if (set[i].style == TRATE) { + double delt = (update->ntimestep - update->beginstep) * update->dt; + set[i].tilt_target = set[i].tilt_start * exp(set[i].rate*delt); + h_rate[i] = set[i].rate * domain->h[i]; + } else if (set[i].style == WIGGLE) { + double delt = (update->ntimestep - update->beginstep) * update->dt; + set[i].tilt_target = set[i].tilt_start + + set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); + h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * + cos(TWOPI*delt/set[i].tperiod); + } else if (set[i].style == VARIABLE) { + double delta_tilt = input->variable->compute_equal(set[i].hvar); + set[i].tilt_target = set[i].tilt_start + delta_tilt; + h_rate[i] = input->variable->compute_equal(set[i].hratevar); + } else { + set[i].tilt_target = set[i].tilt_start + + delta*(set[i].tilt_stop - set[i].tilt_start); + } + + // tilt_target can be large positive or large negative value + // add/subtract box lengths until tilt_target is closest to current value + + int idenom = 0; + if (i == 5) idenom = 0; + else if (i == 4) idenom = 0; + else if (i == 3) idenom = 1; + double denom = set[idenom].hi_target - set[idenom].lo_target; + + double current = h[i]/h[idenom]; + + while (set[i].tilt_target/denom - current > 0.0) + set[i].tilt_target -= denom; + while (set[i].tilt_target/denom - current < 0.0) + set[i].tilt_target += denom; + if (fabs(set[i].tilt_target/denom - 1.0 - current) < + fabs(set[i].tilt_target/denom - current)) + set[i].tilt_target -= denom; + } + } } /* ---------------------------------------------------------------------- @@ -1121,13 +1144,59 @@ void FixDeform::set_strain() void FixDeform::set_pressure() { - for (int i = 0; i < 3; i++) { + // If variable pressure, calculate current target + for (int i = 0; i < 6; i++) + if (set[i].style == PRESSURE) + if (set[i].pvar_flag) + set[i].ptarget = input->variable->compute_equal(set[i].pvar); + // Find current (possibly coupled/hydrostatic) pressure for X, Y, Z + double *tensor = pressure->vector; + double scalar = pressure->scalar; + double p_current[3]; + if (pcouple == XYZ) { + double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]); + p_current[0] = p_current[1] = p_current[2] = ave; + } else if (pcouple == XY) { + double ave = 0.5 * (tensor[0] + tensor[1]); + p_current[0] = p_current[1] = ave; + p_current[2] = tensor[2]; + } else if (pcouple == YZ) { + double ave = 0.5 * (tensor[1] + tensor[2]); + p_current[1] = p_current[2] = ave; + p_current[0] = tensor[0]; + } else if (pcouple == XZ) { + double ave = 0.5 * (tensor[0] + tensor[2]); + p_current[0] = p_current[2] = ave; + p_current[1] = tensor[1]; + } else { + if (set[0].style == PRESSURE) p_current[0] = tensor[0]; + else if (set[0].style == PISOTROPIC) p_current[0] = scalar; + + if (set[1].style == PRESSURE) p_current[1] = tensor[1]; + else if (set[1].style == PISOTROPIC) p_current[1] = scalar; + + if (set[2].style == PRESSURE) p_current[2] = tensor[2]; + else if (set[2].style == PISOTROPIC) p_current[2] = scalar; } - // must define hi+lo target - // + for (int i = 0; i < 3; i++) { + if (set[i].style != PRESSURE && set[i].style != PISOTROPIC) continue; + double dilation = 1.0 - update->dt * set[i].pgain * (set[i].ptarget - p_current[i]); + double center_start = 0.5 * (set[i].lo_start + set[i].hi_start); + double offset = 0.5 * set[i].box_length * dilation; + //printf("ptarget %g vs %g, dilation %g cs %g ofset %g box %g\n", set[i].ptarget, p_current[i], dilation, center_start, offset, set[i].box_length); + set[i].lo_target = center_start - offset; + set[i].hi_target = center_start + offset; + } + + for (int i = 3; i < 6; i++) { + double shift = update->dt * set[i].pgain * (set[i].ptarget - tensor[i]); + if (i == 3) set[i].tilt_target = domain->xy + shift * domain->xprd; + else if (i == 4) set[i].tilt_target = domain->xz + shift * domain->xprd; + else set[i].tilt_target = domain->yz + shift * domain->yprd; + } } /* ---------------------------------------------------------------------- @@ -1136,6 +1205,9 @@ void FixDeform::set_pressure() void FixDeform::set_volume() { + double e1, e2; + int linked_pressure = 0; + for (int i = 0; i < 3; i++) { if (set[i].style != VOLUME) continue; @@ -1150,28 +1222,46 @@ void FixDeform::set_volume() } else if (set[i].substyle == ONE_FROM_TWO) { offset = 0.5 * (v0 / (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / - (set[set[i].dynamic2].hi_target - set[set[i].dynamic2].lo_target)) + (set[set[i].dynamic2].hi_target - set[set[i].dynamic2].lo_target)); } else if (set[i].substyle == TWO_FROM_ONE) { if (!set[i].coupled_flag) { offset = 0.5 * sqrt(v0 * (set[i].hi_start - set[i].lo_start) / (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / - (set[set[i].fixed].hi_start - set[set[i].fixed].lo_start)) + (set[set[i].fixed].hi_start - set[set[i].fixed].lo_start)); } else { + double dt = update->dt; + double e1i = set[i].prior_rate; + double e2i = set[set[i].fixed].prior_rate; + double L3 = (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target); + double e3 = (L3 / set[set[i].dynamic1].box_length - 1.0) / dt; double p1 = pressure->vector[i]; double p2 = pressure->vector[set[i].fixed]; double p1i = set[i].prior_pressure; double p2i = set[set[i].fixed].prior_pressure; - double e1i = set[i].prior_rate; - double e2i = set[set[i].fixed].prior_rate; - double L3 = (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target); - double dt = update->dt; - double e3 = (L3 / set[set[i].dynamic1].box_length - 1.0) / dt; - double e1 = -e3 * dt / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2); - e1 /= (p2 - p2i + (p1 - p1i) / e1p * e2p); - double e2 = (1.0 - (1 + e3 * dt) * (1 + e1 * dt)); - e2 /= (1 + e3 * dt) * (1 + e1 * dt) * dt; - offset = 0.5 * set[i].box_length * (1.0 - e3 * dt); + if (e3 == 0) { + e1 = 0.0; + e2 = 0.0; + offset = 0.5 * set[i].box_length; + } else if (e1i == 0 || e2i == 0 || (p2 == p2i && p1 == p1i)) { + // If no prior strain or no change in pressure (initial step) just scale offset by relative box lengths + offset = 0.5 * sqrt(v0 * set[i].box_length / L3 / set[set[i].fixed].box_length); + } else { + if (! linked_pressure) { + // Calculate first strain rate by expanding stress to linear order in strain to achieve p1(t+dt) = p2(t+dt) + e1 = -e3 / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2); + e1 /= (p2 - p2i + (p1 - p1i) / e1i * e2i); + + // Calculate second strain rate to preserve volume + e2 = (1.0 - (1 + e3 * dt) * (1 + e1 * dt)); + e2 /= (1 + e3 * dt) * (1 + e1 * dt) * dt; + + offset = 0.5 * set[i].box_length * (1.0 + e1 * dt); + linked_pressure = 1; + } else { + offset = 0.5 * set[i].box_length * (1.0 + e2 * dt); + } + } } } set[i].lo_target = center_start - offset; @@ -1229,33 +1319,32 @@ void FixDeform::options(int narg, char **arg) int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"remap") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform remap", error); if (strcmp(arg[iarg+1],"x") == 0) remapflag = Domain::X_REMAP; else if (strcmp(arg[iarg+1],"v") == 0) remapflag = Domain::V_REMAP; else if (strcmp(arg[iarg+1],"none") == 0) remapflag = Domain::NO_REMAP; - else error->all(FLERR,"Illegal fix deform command"); + else error->all(FLERR,"Illegal fix deform remap command: {}", arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"units") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform units", error); if (strcmp(arg[iarg+1],"box") == 0) scaleflag = 0; else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1; - else error->all(FLERR,"Illegal fix deform command"); + else error->all(FLERR,"Illegal fix deform units command: {}", arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"flip") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform flip", error); flipflag = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"couple") == 0) { - if (iarg+2 > narg) - error->all(FLERR,"Illegal fix fix deform command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform couple", error); if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ; else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY; else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ; else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ; else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NOCOUPLE; - else error->all(FLERR,"Illegal fix fix deform command"); + else error->all(FLERR,"Illegal fix fix deform couple command: {}", arg[iarg+1]); iarg += 2; - } else error->all(FLERR,"Illegal fix deform command"); + } else error->all(FLERR,"Illegal fix deform command: {}", arg[iarg]); } if (dimension == 2) @@ -1281,7 +1370,7 @@ int FixDeform::modify_param(int narg, char **arg) { if (!pressure_flag) error->all(FLERR,"Cannot modify fix deform without a pressure control"); if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + if (narg < 2) utils::missing_cmd_args(FLERR, "fix_modify deform", error); if (tflag) { modify->delete_compute(id_temp); tflag = 0; diff --git a/src/fix_deform.h b/src/fix_deform.h index 7297874258..9ecb9a577d 100644 --- a/src/fix_deform.h +++ b/src/fix_deform.h @@ -77,12 +77,14 @@ class FixDeform : public Fix { int fixed, dynamic1, dynamic2; char *hstr, *hratestr, *pstr; int hvar, hratevar; - int pvar; + int pvar, pvar_flag; int coupled_flag; }; Set *set; void options(int, char **); + void set_strain(); + void set_pressure(); void set_volume(); void couple(); }; From 173e2382b3c3752439b601e7fa88f1a7523b50ce Mon Sep 17 00:00:00 2001 From: jtclemm Date: Sat, 22 Oct 2022 19:03:52 -0600 Subject: [PATCH 003/448] Adding documentation, various updates --- doc/src/fix_deform.rst | 169 ++++++++++++++- src/fix_deform.cpp | 482 ++++++++++++++++++++++------------------- src/fix_deform.h | 7 +- 3 files changed, 425 insertions(+), 233 deletions(-) diff --git a/doc/src/fix_deform.rst b/doc/src/fix_deform.rst index 805bd84382..d46f1204f5 100644 --- a/doc/src/fix_deform.rst +++ b/doc/src/fix_deform.rst @@ -34,6 +34,12 @@ Syntax effectively an engineering strain rate *erate* value = R R = engineering strain rate (1/time units) + *pressure* values = target gain + target = target pressure (pressure units) + gain = proportional gain constant (1/(time * pressure) or 1/time units) + *pressure/mean* values = target gain + target = target pressure (pressure units) + gain = proportional gain constant (1/(time * pressure) or 1/time units) *trate* value = R R = true strain rate (1/time units) *volume* value = none = adjust this dim to preserve volume of system @@ -54,6 +60,9 @@ Syntax effectively an engineering shear strain rate *erate* value = R R = engineering shear strain rate (1/time units) + *pressure* values = target gain + target = target pressure (pressure units) + gain = proportional gain constant (1/(time * pressure) or 1/time units) *trate* value = R R = true shear strain rate (1/time units) *wiggle* values = A Tp @@ -64,7 +73,7 @@ Syntax v_name2 = variable with name2 for change rate as function of time * zero or more keyword/value pairs may be appended -* keyword = *remap* or *flip* or *units* +* keyword = *remap* or *flip* or *units* or *couple* or *vol/balance/p* or *max/rate* or *normalize/pressure* .. parsed-literal:: @@ -77,6 +86,14 @@ Syntax *units* value = *lattice* or *box* lattice = distances are defined in lattice units box = distances are defined in simulation box units + *couple* value = *none* or *xyz* or *xy* or *yz* or *xz* + couple pressure values of various dimensions + *vol/balance/p* = *yes* or *no* + Modifies the behavior of the *volume* option to try and balance pressures + *max/rate* value = *rate* + rate = maximum strain rate for pressure control + *normalize/pressure* value = *yes* or *no* + determine whether pressure deviation is normalized by target pressure Examples """""""" @@ -87,6 +104,9 @@ Examples fix 1 all deform 1 x trate 0.1 y volume z volume fix 1 all deform 1 xy erate 0.001 remap v fix 1 all deform 10 y delta -0.5 0.5 xz vel 1.0 + fix 1 all deform 1 x pressure 2.0 0.1 normalize/pressure yes max/rate 0.001 + fix 1 all deform 1 x trate 0.1 y volume z volume vol/balance/p yes + fix 1 all deform 1 x trate 0.1 y pressure/mean 0.0 1.0 z pressure/mean 0.0 1.0 Description """"""""""" @@ -230,7 +250,11 @@ the product of x,z lengths constant. If "x scale 1.1 y volume z volume" is specified, then both the y,z box lengths will shrink as x increases to keep the volume constant (product of x,y,z lengths). In this case, the y,z box lengths shrink so as to keep their relative -aspect ratio constant. +aspect ratio constant. When maintaining a constant volume using two +separate dimensions, one can alternatively allow the two dimensions +to adjust their aspect ratio to attempt to maintain equivalent +pressures along the two dimensions. See the +:ref:`vol/balance/p ` option for more details. For solids or liquids, note that when one dimension of the box is expanded via fix deform (i.e. tensile strain), it may be physically @@ -292,6 +316,38 @@ For the *scale*, *vel*, *erate*, *trate*, *volume*, *wiggle*, and *variable* styles, the box length is expanded or compressed around its mid point. +The *pressure* style adjusts a dimensions's box length to control that +component of the pressure tensor. This option attempts to maintain a +specified target value using a linear controller where the box length L +evolves according to the equation + +.. parsed-literal:: + + \frac{d L(t)}{dt} = L(t) k (P_t - P) + +where :math:`k` is a proportional gain constant, :math:`P_t` is the target +pressure, and :math:`P` is the current pressure along that dimension. This +approach is similar to the method used to control the pressure by +:doc:`fix press/berendsen `. The target pressure +accepts either a constant numeric value or a LAMMPS :ref:`variable `. +Notably, this variable can be a function of time or other components of +the pressure tensor. By default, :math:`k` has units of 1/(time * pressure) +although this will change if the *normalize/pessure* option is set as +:ref:`discussed below `. There is no proven method +to choosing an appropriate value of :math:`k` as it will depend on the +specific details of a simulation and testing different values is +recommended. One can also apply a maximum limit to the magnitude of the +applied strain using the :ref:`max/rate ` option and couple +pressures in different dimensions using the :ref:`couple ` +option. + +The *pressure/mean* style is changes a dimension in order to maintain +a constant mean pressure defined as the trace of the pressure tensor. +This option is therefore very similar to the *presssure* style with +identical arguments except the current and target pressures refer to the +mean trace of the pressure tensor. The same options also apply except +for the :ref:`couple ` option. + ---------- For the *xy*, *xz*, and *yz* parameters, this is the meaning of their @@ -433,6 +489,27 @@ assume that the current timestep = 0. variable rate equal "2*PI*v_A/v_Tp * cos(2*PI * step*dt/v_Tp)" fix 2 all deform 1 xy variable v_displace v_rate remap v +The *pressure* style adjusts a tilt factor to control the corresponding +off-diagonal component of the pressure tensor. This option attempts to +maintain a specified target value using a linear controller where the +tilt factor T evolves according to the equation + +.. parsed-literal:: + + \frac{d T(t)}{dt} = L(t) k (P - P_t) + +where :math:`k` is a proportional gain constant, :math:`P_t` is the target +pressure, :math:`P` is the current pressure, and :math:`L` is the perpendicular +box length. The target pressure accepts either a constant numeric value or a +LAMMPS :ref:`variable `. Notably, this variable can be a function +of time or other components of the pressure tensor. By default, :math:`k` +has units of 1/(time * pressure) although this will change if the +*normalize/pessure* option is set as :ref:`discussed below `. +There is no proven method to choosing an appropriate value of :math:`k` as it +will depend on thespecific details of a simulation and testing different +values isrecommended. One can also apply a maximum limit to the magnitude +of the applied strain using the :ref:`max/rate ` option. + ---------- All of the tilt styles change the xy, xz, yz tilt factors during a @@ -561,6 +638,73 @@ does not affect the *variable* style. You should use the *xlat*, *ylat*, *zlat* keywords of the :doc:`thermo_style ` command if you want to include lattice spacings in a variable formula. +.. _deform_normalize: + +The *normalize/pressure* keyword changes how box dimensions evolve when +using the *pressure* or *pressure/mean* deformation options. If the +*deform/normalize* value is set to *yes*, then the deviation from the +target pressure is normalized by the absolute value of the target +pressure such that the proportional gain constant scales a percentage +error and has units of 1/time. If the target pressure is ever zero, this +will produce an error unless the *max/rate* keyword is defined, +described below, which will cap the divergence. + +.. _deform_max_rate: + +The *max/rate* keyword sets an upper threshold, *rate*, that limits the +maximum magnitude of the strain rate applied in any dimension. This keyword +only applies to the *pressure* and *pressure/mean* options. + +.. _deform_couple: + +The *couple* keyword allows two or three of the diagonal components of +the pressure tensor to be "coupled" together for the *pressure* option. +The value specified with the keyword determines which are coupled. For +example, *xz* means the *Pxx* and *Pzz* components of the stress tensor +are coupled. *Xyz* means all 3 diagonal components are coupled. Coupling +means two things: the instantaneous stress will be computed as an average +of the corresponding diagonal components, and the coupled box dimensions +will be changed together in lockstep, meaning coupled dimensions will be +dilated or contracted by the same percentage every timestep. The target +pressures and gain constants for any coupled dimensions must be identical. +*Couple xyz* can be used for a 2d simulation; the *z* dimension is simply +ignored. + +.. _deform_balance: + +The *vol/balance/p* keyword modifies the behavior of *volume* when two +dimensions are used to maintain a fixed volume. Instead of straining +the two dimensions in lockstep, the two dimensions are allowed to +separately dilate or contract in a manner to maintain a constant +volume while simultaneously trying to keep the pressure along each +dimension equal using a method described in :ref:`(Huang2014) `. + +---------- + +If any pressure controls are used, this fix computes a temperature and +pressure each timestep. To do this, the fix creates its own computes of +style "temp" and "pressure", as if these commands had been issued: + +.. code-block:: LAMMPS + + compute fix-ID_temp group-ID temp + compute fix-ID_press group-ID pressure fix-ID_temp + +See the :doc:`compute temp ` and :doc:`compute pressure ` commands for details. Note that the +IDs of the new computes are the fix-ID + underscore + "temp" or fix_ID ++ underscore + "press", and the group for the new computes is the same +as the fix group. + +Note that these are NOT the computes used by thermodynamic output (see +the :doc:`thermo_style ` command) with ID = *thermo_temp* +and *thermo_press*. This means you can change the attributes of this +fix's temperature or pressure via the +:doc:`compute_modify ` command or print this temperature +or pressure during thermodynamic output via the +:doc:`thermo_style custom ` command using the appropriate +compute-ID. It also means that changing attributes of *thermo_temp* or +*thermo_press* will have no effect on this fix. + ---------- .. include:: accel_styles.rst @@ -574,6 +718,15 @@ command. None of the :doc:`fix_modify ` options are relevant to this fix. No global or per-atom quantities are stored by this fix for access by various :doc:`output commands `. +If any pressure controls are used, the :doc:`fix_modify ` *temp* +and *press* options are supported by this fix. You can use them to assign a +:doc:`compute ` you have defined to this fix which will be used +in its temperature and pressure calculations. If you do this, note +that the kinetic energy derived from the compute temperature should be +consistent with the virial term computed using all atoms for the +pressure. LAMMPS will warn you if you choose to compute temperature +on a subset of atoms. + This fix can perform deformation over multiple runs, using the *start* and *stop* keywords of the :doc:`run ` command. See the :doc:`run ` command for details of how to do this. @@ -597,4 +750,14 @@ Related commands Default """"""" -The option defaults are remap = x, flip = yes, and units = lattice. +The option defaults are remap = x, flip = yes, units = lattice, and +normalize/pressure = no. + +---------- + +.. _Li2014b: + +**(Huang2014)** X. Huang, +"Exploring critical-state behavior using DEM", +Doctoral dissertation, Imperial College. +(2014). https://doi.org/10.25560/25316 diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 766d7c22ae..db3fad6a95 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -13,13 +13,9 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Pieter in 't Veld (SNL) + Contributing author: Pieter in 't Veld (SNL), Joel Clemmer (SNL) ------------------------------------------------------------------------- */ -// Save previous state to restart file for derivatives -// define hrate_lo/hi for volume/pressure -// add logic for hi_stop and flip flag - #include "fix_deform.h" #include "atom.h" @@ -44,7 +40,7 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -enum{NONE=0,FINAL,DELTA,SCALE,VEL,ERATE,TRATE,VOLUME,WIGGLE,VARIABLE,PRESSURE,PISOTROPIC}; +enum{NONE=0,FINAL,DELTA,SCALE,VEL,ERATE,TRATE,VOLUME,WIGGLE,VARIABLE,PRESSURE,PMEAN}; enum{ONE_FROM_ONE,ONE_FROM_TWO,TWO_FROM_ONE}; enum{NOCOUPLE=0,XYZ,XY,YZ,XZ}; @@ -60,6 +56,9 @@ irregular(nullptr), set(nullptr) pre_exchange_migrate = 1; pcouple = NOCOUPLE; dimension = domain->dimension; + max_h_rate = 0.0; + vol_balance_flag = 0; + normalize_pressure_flag = 0; nevery = utils::inumeric(FLERR,arg[3],false,lmp); if (nevery <= 0) error->all(FLERR,"Illegal fix deform command"); @@ -153,9 +152,9 @@ irregular(nullptr), set(nullptr) if (set[index].pgain <= 0.0) error->all(FLERR,"Illegal fix deform pressure gain, must be positive"); iarg += 4; - } else if (strcmp(arg[iarg+1],"pressure/isotropic") == 0) { - if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform pressure/isotropic", error); - set[index].style = PISOTROPIC; + } else if (strcmp(arg[iarg+1],"pressure/mean") == 0) { + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform pressure/mean", error); + set[index].style = PMEAN; if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); } else { @@ -266,21 +265,21 @@ irregular(nullptr), set(nullptr) for (int i = 0; i < 3; i++) { if (coupled_indices[i]) { set[i].coupled_flag = 1; - if (set[i].style != VOLUME && set[i].style != PRESSURE && set[i].style != NONE) - error->all(FLERR, "Cannot couple dimensions unless they are controlled using the pressure or volume option in fix deform"); - if (set[i].style == PRESSURE || set[i].style == VOLUME) + if (set[i].style != PRESSURE && set[i].style != NONE) + error->all(FLERR, "Cannot couple non-pressure-controlled dimensions"); + if (set[i].style == PRESSURE) j = i; } } if (j == -1) - error->all(FLERR, "Must specify pressure style for a coupled dimension in fix deform"); + error->all(FLERR, "Must specify deformation style for at least one coupled dimension"); // Copy or compare data for each coupled dimension for (int i = 0; i < 3; i++) { if (coupled_indices[i]) { // Copy coupling information if dimension style is undefined - if (set[j].style == PRESSURE && set[i].style == NONE) { + if (set[i].style == NONE) { set[i].style = PRESSURE; set[i].pgain = set[j].pgain; if (set[j].pvar_flag) { @@ -289,32 +288,21 @@ irregular(nullptr), set(nullptr) } else { set[i].ptarget = set[j].ptarget; } - } else if (set[j].style == VOLUME && set[i].style == NONE) { - set[i].style = VOLUME; - if (domain->dimension == 2) - error->all(FLERR, "Cannot couple pressure with constant volume in two dimensions"); + } else { + // Check for incompatibilities in style + if (set[j].style != set[i].style && set[i].style != NONE) + error->all(FLERR, "Cannot couple dimensions with different control options"); + if (set[j].style != PRESSURE) continue; + + // If pressure controlled, check for incompatibilities in parameters + if (set[i].pgain != set[j].pgain || set[i].pvar_flag != set[j].pvar_flag || + set[i].ptarget != set[j].ptarget) + error->all(FLERR, "Coupled dimensions must have identical gain parameters"); + + if (set[j].pvar_flag) + if (strcmp(set[i].pstr, set[j].pstr) != 0) + error->all(FLERR, "Coupled dimensions must have the same target pressure"); } - - // Check for incompatibilities in style - if (set[j].style != set[i].style && set[i].style != NONE) - error->all(FLERR, "Cannot couple dimensions with different control options"); - if (set[j].style != PRESSURE) continue; - - // If pressure controlled, check for incompatibilities in parameters - if (set[i].pgain != set[j].pgain) - error->all(FLERR, "Coupled dimensions must have identical gain parameters\n"); - - if (set[i].pvar_flag != set[j].pvar_flag) - error->all(FLERR, "Coupled dimensions must have the same target pressure\n"); - if (set[j].pvar_flag) - if (strcmp(set[i].pstr, set[j].pstr) != 0) - error->all(FLERR, "Coupled dimensions must have the same target pressure\n"); - if (set[i].ptarget != set[j].ptarget) - error->all(FLERR, "Coupled dimensions must have the same target pressure\n"); - - } else { - if (set[i].style == VOLUME && set[j].style == VOLUME) - error->all(FLERR, "Dimensions used to maintain constant volume must either all be coupled or uncoupled"); } } } @@ -335,27 +323,18 @@ irregular(nullptr), set(nullptr) // no tensile deformation on shrink-wrapped dims // b/c shrink wrap will change box-length - if (set[0].style && - (domain->boundary[0][0] >= 2 || domain->boundary[0][1] >= 2)) - error->all(FLERR,"Cannot use fix deform on a shrink-wrapped boundary"); - if (set[1].style && - (domain->boundary[1][0] >= 2 || domain->boundary[1][1] >= 2)) - error->all(FLERR,"Cannot use fix deform on a shrink-wrapped boundary"); - if (set[2].style && - (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2)) + for (int i = 0; i < 3; i++) + if (set[i].style && (domain->boundary[i][0] >= 2 || domain->boundary[i][1] >= 2)) error->all(FLERR,"Cannot use fix deform on a shrink-wrapped boundary"); // no tilt deformation on shrink-wrapped 2nd dim // b/c shrink wrap will change tilt factor in domain::reset_box() - if (set[3].style && - (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2)) + if (set[3].style && (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2)) error->all(FLERR,"Cannot use fix deform tilt on a shrink-wrapped 2nd dim"); - if (set[4].style && - (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2)) + if (set[4].style && (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2)) error->all(FLERR,"Cannot use fix deform tilt on a shrink-wrapped 2nd dim"); - if (set[5].style && - (domain->boundary[1][0] >= 2 || domain->boundary[1][1] >= 2)) + if (set[5].style && (domain->boundary[1][0] >= 2 || domain->boundary[1][1] >= 2)) error->all(FLERR,"Cannot use fix deform tilt on a shrink-wrapped 2nd dim"); // apply scaling to FINAL,DELTA,VEL,WIGGLE since they have dist/vel units @@ -436,6 +415,9 @@ irregular(nullptr), set(nullptr) set[i].dynamic1 = other1; set[i].dynamic2 = other2; } + + if (vol_balance_flag && set[i].substyle != TWO_FROM_ONE) + error->all(FLERR, "Two dimensions must maintain constant volume to use the vol/balance/p option"); } // set strain_flag @@ -443,30 +425,41 @@ irregular(nullptr), set(nullptr) strain_flag = 0; for (int i = 0; i < 6; i++) if (set[i].style != NONE && set[i].style != VOLUME && - set[i].style != PRESSURE && set[i].style != PISOTROPIC) + set[i].style != PRESSURE && set[i].style != PMEAN) strain_flag = 1; // set varflag varflag = 0; - for (int i = 0; i < 6; i++) + for (int i = 0; i < 6; i++) { if (set[i].style == VARIABLE) varflag = 1; + if (set[i].pvar_flag) varflag = 1; + } // set pressure_flag pressure_flag = 0; for (int i = 0; i < 6; i++) { - if (set[i].style == PRESSURE || set[i].style == PISOTROPIC) pressure_flag = 1; + if (set[i].style == PRESSURE || set[i].style == PMEAN) pressure_flag = 1; if (set[i].coupled_flag) pressure_flag = 1; } + if (vol_balance_flag) pressure_flag = 1; // check conflict between constant volume/pressure if (volume_flag) for (int i = 0; i < 6; i++) - if (set[i].style == PISOTROPIC) + if (set[i].style == PMEAN) error->all(FLERR, "Cannot use fix deform to assign constant volume and pressure"); + // check pressure used for max rate and normalize error flag + + if (!pressure_flag && max_h_rate != 0) + error->all(FLERR, "Can only assign a maximum strain rate using pressure-controlled dimensions"); + + if (!pressure_flag && normalize_pressure_flag) + error->all(FLERR, "Can only normalize error using pressure-controlled dimensions"); + // set initial values at time fix deform is issued for (int i = 0; i < 3; i++) { @@ -489,8 +482,6 @@ irregular(nullptr), set(nullptr) if (force_reneighbor) irregular = new Irregular(lmp); else irregular = nullptr; - TWOPI = 2.0*MY_PI; - // Create pressure compute, if needed pflag = 0; @@ -592,7 +583,7 @@ void FixDeform::init() error->all(FLERR,"Variable {} for fix deform is invalid style", set[i].hratestr); } - // check optional variables for PRESSURE or PISOTROPIC style + // check optional variables for PRESSURE or PMEAN style for (int i = 0; i < 6; i++) { if (!set[i].pvar_flag) continue; @@ -627,30 +618,26 @@ void FixDeform::init() set[i].lo_stop = set[i].lo_start + set[i].dlo; set[i].hi_stop = set[i].hi_start + set[i].dhi; } else if (set[i].style == SCALE) { - set[i].lo_stop = 0.5*(set[i].lo_start+set[i].hi_start) - - 0.5*set[i].scale*(set[i].hi_start-set[i].lo_start); - set[i].hi_stop = 0.5*(set[i].lo_start+set[i].hi_start) + - 0.5*set[i].scale*(set[i].hi_start-set[i].lo_start); + double shift = 0.5 * set[i].scale * (set[i].hi_start - set[i].lo_start); + set[i].lo_stop = 0.5 * (set[i].lo_start + set[i].hi_start) - shift; + set[i].hi_stop = 0.5 * (set[i].lo_start + set[i].hi_start) + shift; } else if (set[i].style == VEL) { - set[i].lo_stop = set[i].lo_start - 0.5*delt*set[i].vel; - set[i].hi_stop = set[i].hi_start + 0.5*delt*set[i].vel; + set[i].lo_stop = set[i].lo_start - 0.5 * delt * set[i].vel; + set[i].hi_stop = set[i].hi_start + 0.5 * delt * set[i].vel; } else if (set[i].style == ERATE) { - set[i].lo_stop = set[i].lo_start - - 0.5*delt*set[i].rate * (set[i].hi_start-set[i].lo_start); - set[i].hi_stop = set[i].hi_start + - 0.5*delt*set[i].rate * (set[i].hi_start-set[i].lo_start); + double shift = 0.5 * delt * set[i].rate * (set[i].hi_start - set[i].lo_start); + set[i].lo_stop = set[i].lo_start - shift; + set[i].hi_stop = set[i].hi_start + shift; if (set[i].hi_stop <= set[i].lo_stop) error->all(FLERR,"Final box dimension due to fix deform is < 0.0"); } else if (set[i].style == TRATE) { - set[i].lo_stop = 0.5*(set[i].lo_start+set[i].hi_start) - - 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); - set[i].hi_stop = 0.5*(set[i].lo_start+set[i].hi_start) + - 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); + double shift = 0.5 * ((set[i].hi_start - set[i].lo_start) * exp(set[i].rate * delt)); + set[i].lo_stop = 0.5 * (set[i].lo_start + set[i].hi_start) - shift; + set[i].hi_stop = 0.5 * (set[i].lo_start + set[i].hi_start) + shift; } else if (set[i].style == WIGGLE) { - set[i].lo_stop = set[i].lo_start - - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - set[i].hi_stop = set[i].hi_start + - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); + double shift = 0.5 * set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod); + set[i].lo_stop = set[i].lo_start - shift; + set[i].hi_stop = set[i].hi_start + shift; } } @@ -666,50 +653,46 @@ void FixDeform::init() } else if (set[i].style == DELTA) { set[i].tilt_stop = set[i].tilt_start + set[i].dtilt; } else if (set[i].style == VEL) { - set[i].tilt_stop = set[i].tilt_start + delt*set[i].vel; + set[i].tilt_stop = set[i].tilt_start + delt * set[i].vel; } else if (set[i].style == ERATE) { if (i == 3) set[i].tilt_stop = set[i].tilt_start + - delt*set[i].rate * (set[2].hi_start-set[2].lo_start); + delt * set[i].rate * (set[2].hi_start - set[2].lo_start); if (i == 4) set[i].tilt_stop = set[i].tilt_start + - delt*set[i].rate * (set[2].hi_start-set[2].lo_start); + delt * set[i].rate * (set[2].hi_start - set[2].lo_start); if (i == 5) set[i].tilt_stop = set[i].tilt_start + - delt*set[i].rate * (set[1].hi_start-set[1].lo_start); + delt * set[i].rate * (set[1].hi_start - set[1].lo_start); } else if (set[i].style == TRATE) { - set[i].tilt_stop = set[i].tilt_start * exp(set[i].rate*delt); + set[i].tilt_stop = set[i].tilt_start * exp(set[i].rate * delt); } else if (set[i].style == WIGGLE) { - set[i].tilt_stop = set[i].tilt_start + - set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); + double shift = set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod); + set[i].tilt_stop = set[i].tilt_start + shift; // compute min/max for WIGGLE = extrema tilt factor will ever reach if (set[i].amplitude >= 0.0) { - if (delt < 0.25*set[i].tperiod) { + if (delt < 0.25 * set[i].tperiod) { set[i].tilt_min = set[i].tilt_start; - set[i].tilt_max = set[i].tilt_start + - set[i].amplitude*sin(TWOPI*delt/set[i].tperiod); - } else if (delt < 0.5*set[i].tperiod) { + set[i].tilt_max = set[i].tilt_start + shift; + } else if (delt < 0.5 * set[i].tperiod) { set[i].tilt_min = set[i].tilt_start; set[i].tilt_max = set[i].tilt_start + set[i].amplitude; - } else if (delt < 0.75*set[i].tperiod) { - set[i].tilt_min = set[i].tilt_start - - set[i].amplitude*sin(TWOPI*delt/set[i].tperiod); + } else if (delt < 0.75 * set[i].tperiod) { + set[i].tilt_min = set[i].tilt_start - shift; set[i].tilt_max = set[i].tilt_start + set[i].amplitude; } else { set[i].tilt_min = set[i].tilt_start - set[i].amplitude; set[i].tilt_max = set[i].tilt_start + set[i].amplitude; } } else { - if (delt < 0.25*set[i].tperiod) { - set[i].tilt_min = set[i].tilt_start - - set[i].amplitude*sin(TWOPI*delt/set[i].tperiod); + if (delt < 0.25 * set[i].tperiod) { + set[i].tilt_min = set[i].tilt_start - shift; set[i].tilt_max = set[i].tilt_start; - } else if (delt < 0.5*set[i].tperiod) { + } else if (delt < 0.5 * set[i].tperiod) { set[i].tilt_min = set[i].tilt_start - set[i].amplitude; set[i].tilt_max = set[i].tilt_start; - } else if (delt < 0.75*set[i].tperiod) { + } else if (delt < 0.75 * set[i].tperiod) { set[i].tilt_min = set[i].tilt_start - set[i].amplitude; - set[i].tilt_max = set[i].tilt_start + - set[i].amplitude*sin(TWOPI*delt/set[i].tperiod); + set[i].tilt_max = set[i].tilt_start + shift; } else { set[i].tilt_min = set[i].tilt_start - set[i].amplitude; set[i].tilt_max = set[i].tilt_start + set[i].amplitude; @@ -729,25 +712,25 @@ void FixDeform::init() // this is b/c the flips would induce continuous changes in xz // in order to keep the edge vectors of the flipped shape matrix // an integer combination of the edge vectors of the unflipped shape matrix - // VARIABLE for yz is error, since no way to calculate if box flip occurs + // VARIABLE or PRESSURE for yz is error, since no way to calculate if box flip occurs // WIGGLE lo/hi flip test is on min/max oscillation limit, not tilt_stop // only trigger actual errors if flipflag is set if (set[3].style && set[5].style) { int flag = 0; double lo,hi; - if (flipflag && set[3].style == VARIABLE) - error->all(FLERR,"Fix deform cannot use yz variable with xy"); + if (flipflag && (set[3].style == VARIABLE || set[3].style == PRESSURE)) + error->all(FLERR,"Fix deform cannot use yz variable or pressure with xy"); if (set[3].style == WIGGLE) { lo = set[3].tilt_min; hi = set[3].tilt_max; } else lo = hi = set[3].tilt_stop; if (flipflag) { - if (lo/(set[1].hi_start-set[1].lo_start) < -0.5 || - hi/(set[1].hi_start-set[1].lo_start) > 0.5) flag = 1; + if (lo / (set[1].hi_start - set[1].lo_start) < -0.5 || + hi / (set[1].hi_start - set[1].lo_start) > 0.5) flag = 1; if (set[1].style) { - if (lo/(set[1].hi_stop-set[1].lo_stop) < -0.5 || - hi/(set[1].hi_stop-set[1].lo_stop) > 0.5) flag = 1; + if (lo / (set[1].hi_stop - set[1].lo_stop) < -0.5 || + hi / (set[1].hi_stop - set[1].lo_stop) > 0.5) flag = 1; } if (flag) error->all(FLERR,"Fix deform is changing yz too much with xy"); @@ -798,13 +781,11 @@ void FixDeform::init() if (pressure_flag) { int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix deform does not exist"); + if (icompute < 0) error->all(FLERR,"Temperature ID for fix deform does not exist"); temperature = modify->compute[icompute]; icompute = modify->find_compute(id_press); - if (icompute < 0) - error->all(FLERR,"Pressure ID for fix deform does not exist"); + if (icompute < 0) error->all(FLERR,"Pressure ID for fix deform does not exist"); pressure = modify->compute[icompute]; } } @@ -816,7 +797,6 @@ void FixDeform::init() void FixDeform::setup(int /*vflag*/) { // trigger virial computation on next timestep - if (pressure_flag) pressure->addstep(update->ntimestep+1); } @@ -872,13 +852,10 @@ void FixDeform::end_of_step() temperature->compute_vector(); pressure->compute_vector(); for (int i = 0; i < 3; i++) { - if (! set[i].saved) { + if (!set[i].saved) { set[i].saved = 1; set[i].prior_rate = 0.0; set[i].prior_pressure = pressure->vector[i]; - if (i == 0) set[i].box_length = domain->xprd; - else if (i == 1) set[i].box_length = domain->yprd; - else set[i].box_length = domain->zprd; } } set_pressure(); @@ -892,15 +869,39 @@ void FixDeform::end_of_step() // Save pressure/strain rate if required if (pressure_flag) { - double dt_inv = 1.0 / update->dt; for (int i = 0; i < 3; i++) { set[i].prior_pressure = pressure->vector[i]; - set[i].prior_rate = ((set[i].hi_target - set[i].lo_target) / set[i].box_length - 1.0) * dt_inv; + set[i].prior_rate = ((set[i].hi_target - set[i].lo_target) / + (domain->boxhi[i] - domain->boxlo[i]) - 1.0) / update->dt; } } if (varflag) modify->addstep_compute(update->ntimestep + nevery); + // tilt_target can be large positive or large negative value + // add/subtract box lengths until tilt_target is closest to current value + + if (triclinic) { + double *h = domain->h; + for (int i = 3; i < 6; i++) { + int idenom = 0; + if (i == 5) idenom = 0; + else if (i == 4) idenom = 0; + else if (i == 3) idenom = 1; + double denom = set[idenom].hi_target - set[idenom].lo_target; + + double current = h[i] / h[idenom]; + + while (set[i].tilt_target / denom - current > 0.0) + set[i].tilt_target -= denom; + while (set[i].tilt_target / denom - current < 0.0) + set[i].tilt_target += denom; + if (fabs(set[i].tilt_target / denom - 1.0 - current) < + fabs(set[i].tilt_target / denom - current)) + set[i].tilt_target -= denom; + } + } + // if any tilt ratios exceed 0.5, set flip = 1 and compute new tilt values // do not flip in x or y if non-periodic (can tilt but not flip) // this is b/c the box length would be changed (dramatically) by flip @@ -915,12 +916,12 @@ void FixDeform::end_of_step() double yprd = set[1].hi_target - set[1].lo_target; double xprdinv = 1.0 / xprd; double yprdinv = 1.0 / yprd; - if (set[3].tilt_target*yprdinv < -0.5 || - set[3].tilt_target*yprdinv > 0.5 || - set[4].tilt_target*xprdinv < -0.5 || - set[4].tilt_target*xprdinv > 0.5 || - set[5].tilt_target*xprdinv < -0.5 || - set[5].tilt_target*xprdinv > 0.5) { + if (set[3].tilt_target * yprdinv < -0.5 || + set[3].tilt_target * yprdinv > 0.5 || + set[4].tilt_target * xprdinv < -0.5 || + set[4].tilt_target * xprdinv > 0.5 || + set[5].tilt_target * xprdinv < -0.5 || + set[5].tilt_target * xprdinv > 0.5) { set[3].tilt_flip = set[3].tilt_target; set[4].tilt_flip = set[4].tilt_target; set[5].tilt_flip = set[5].tilt_target; @@ -928,30 +929,30 @@ void FixDeform::end_of_step() flipxy = flipxz = flipyz = 0; if (domain->yperiodic) { - if (set[3].tilt_flip*yprdinv < -0.5) { + if (set[3].tilt_flip * yprdinv < -0.5) { set[3].tilt_flip += yprd; set[4].tilt_flip += set[5].tilt_flip; flipyz = 1; - } else if (set[3].tilt_flip*yprdinv > 0.5) { + } else if (set[3].tilt_flip * yprdinv > 0.5) { set[3].tilt_flip -= yprd; set[4].tilt_flip -= set[5].tilt_flip; flipyz = -1; } } if (domain->xperiodic) { - if (set[4].tilt_flip*xprdinv < -0.5) { + if (set[4].tilt_flip * xprdinv < -0.5) { set[4].tilt_flip += xprd; flipxz = 1; } - if (set[4].tilt_flip*xprdinv > 0.5) { + if (set[4].tilt_flip * xprdinv > 0.5) { set[4].tilt_flip -= xprd; flipxz = -1; } - if (set[5].tilt_flip*xprdinv < -0.5) { + if (set[5].tilt_flip * xprdinv < -0.5) { set[5].tilt_flip += xprd; flipxy = 1; } - if (set[5].tilt_flip*xprdinv > 0.5) { + if (set[5].tilt_flip * xprdinv > 0.5) { set[5].tilt_flip -= xprd; flipxy = -1; } @@ -1023,12 +1024,8 @@ void FixDeform::end_of_step() // trigger virial computation, if needed, on next timestep - if (pressure_flag) { + if (pressure_flag) pressure->addstep(update->ntimestep+1); - set[0].box_length = domain->xprd; - set[1].box_length = domain->yprd; - set[2].box_length = domain->zprd; - } } /* ---------------------------------------------------------------------- @@ -1052,36 +1049,31 @@ void FixDeform::set_strain() set[i].hi_target = domain->boxhi[i]; } else if (set[i].style == TRATE) { double delt = (update->ntimestep - update->beginstep) * update->dt; - set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) - - 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); - set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) + - 0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt)); + double shift = 0.5 * ((set[i].hi_start - set[i].lo_start) * exp(set[i].rate * delt)); + set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift; + set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift; h_rate[i] = set[i].rate * domain->h[i]; - h_ratelo[i] = -0.5*h_rate[i]; + h_ratelo[i] = -0.5 * h_rate[i]; } else if (set[i].style == WIGGLE) { double delt = (update->ntimestep - update->beginstep) * update->dt; - set[i].lo_target = set[i].lo_start - - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - set[i].hi_target = set[i].hi_start + - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * - cos(TWOPI*delt/set[i].tperiod); - h_ratelo[i] = -0.5*h_rate[i]; + double shift = 0.5 * set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod); + set[i].lo_target = set[i].lo_start - shift; + set[i].hi_target = set[i].hi_start + shift; + h_rate[i] = MY_2PI / set[i].tperiod * set[i].amplitude * + cos(MY_2PI * delt / set[i].tperiod); + h_ratelo[i] = -0.5 * h_rate[i]; } else if (set[i].style == VARIABLE) { double del = input->variable->compute_equal(set[i].hvar); - set[i].lo_target = set[i].lo_start - 0.5*del; - set[i].hi_target = set[i].hi_start + 0.5*del; + set[i].lo_target = set[i].lo_start - 0.5 * del; + set[i].hi_target = set[i].hi_start + 0.5 * del; h_rate[i] = input->variable->compute_equal(set[i].hratevar); - h_ratelo[i] = -0.5*h_rate[i]; + h_ratelo[i] = -0.5 * h_rate[i]; } else if (set[i].style != VOLUME) { - set[i].lo_target = set[i].lo_start + - delta*(set[i].lo_stop - set[i].lo_start); - set[i].hi_target = set[i].hi_start + - delta*(set[i].hi_stop - set[i].hi_start); + set[i].lo_target = set[i].lo_start + delta * (set[i].lo_stop - set[i].lo_start); + set[i].hi_target = set[i].hi_start + delta * (set[i].hi_stop - set[i].hi_start); } } - // for triclinic, set new box shape // for NONE, target is current tilt // for TRATE, set target directly based on current time. also set h_rate @@ -1099,41 +1091,21 @@ void FixDeform::set_strain() else if (i == 3) set[i].tilt_target = domain->yz; } else if (set[i].style == TRATE) { double delt = (update->ntimestep - update->beginstep) * update->dt; - set[i].tilt_target = set[i].tilt_start * exp(set[i].rate*delt); + set[i].tilt_target = set[i].tilt_start * exp(set[i].rate * delt); h_rate[i] = set[i].rate * domain->h[i]; } else if (set[i].style == WIGGLE) { double delt = (update->ntimestep - update->beginstep) * update->dt; set[i].tilt_target = set[i].tilt_start + - set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * - cos(TWOPI*delt/set[i].tperiod); + set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod); + h_rate[i] = MY_2PI / set[i].tperiod * set[i].amplitude * + cos(MY_2PI * delt / set[i].tperiod); } else if (set[i].style == VARIABLE) { double delta_tilt = input->variable->compute_equal(set[i].hvar); set[i].tilt_target = set[i].tilt_start + delta_tilt; h_rate[i] = input->variable->compute_equal(set[i].hratevar); } else { - set[i].tilt_target = set[i].tilt_start + - delta*(set[i].tilt_stop - set[i].tilt_start); + set[i].tilt_target = set[i].tilt_start + delta * (set[i].tilt_stop - set[i].tilt_start); } - - // tilt_target can be large positive or large negative value - // add/subtract box lengths until tilt_target is closest to current value - - int idenom = 0; - if (i == 5) idenom = 0; - else if (i == 4) idenom = 0; - else if (i == 3) idenom = 1; - double denom = set[idenom].hi_target - set[idenom].lo_target; - - double current = h[i]/h[idenom]; - - while (set[i].tilt_target/denom - current > 0.0) - set[i].tilt_target -= denom; - while (set[i].tilt_target/denom - current < 0.0) - set[i].tilt_target += denom; - if (fabs(set[i].tilt_target/denom - 1.0 - current) < - fabs(set[i].tilt_target/denom - current)) - set[i].tilt_target -= denom; } } } @@ -1156,7 +1128,7 @@ void FixDeform::set_pressure() double p_current[3]; if (pcouple == XYZ) { - double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]); + double ave = THIRD * (tensor[0] + tensor[1] + tensor[2]); p_current[0] = p_current[1] = p_current[2] = ave; } else if (pcouple == XY) { double ave = 0.5 * (tensor[0] + tensor[1]); @@ -1172,30 +1144,68 @@ void FixDeform::set_pressure() p_current[1] = tensor[1]; } else { if (set[0].style == PRESSURE) p_current[0] = tensor[0]; - else if (set[0].style == PISOTROPIC) p_current[0] = scalar; + else if (set[0].style == PMEAN) p_current[0] = scalar; if (set[1].style == PRESSURE) p_current[1] = tensor[1]; - else if (set[1].style == PISOTROPIC) p_current[1] = scalar; + else if (set[1].style == PMEAN) p_current[1] = scalar; if (set[2].style == PRESSURE) p_current[2] = tensor[2]; - else if (set[2].style == PISOTROPIC) p_current[2] = scalar; + else if (set[2].style == PMEAN) p_current[2] = scalar; } for (int i = 0; i < 3; i++) { - if (set[i].style != PRESSURE && set[i].style != PISOTROPIC) continue; - double dilation = 1.0 - update->dt * set[i].pgain * (set[i].ptarget - p_current[i]); - double center_start = 0.5 * (set[i].lo_start + set[i].hi_start); - double offset = 0.5 * set[i].box_length * dilation; - //printf("ptarget %g vs %g, dilation %g cs %g ofset %g box %g\n", set[i].ptarget, p_current[i], dilation, center_start, offset, set[i].box_length); - set[i].lo_target = center_start - offset; - set[i].hi_target = center_start + offset; + if (set[i].style != PRESSURE && set[i].style != PMEAN) continue; + + h_rate[i] = set[i].pgain * (p_current[i] - set[i].ptarget); + if (normalize_pressure_flag) { + if (set[i].ptarget == 0) { + if (max_h_rate == 0) { + error->all(FLERR, "Cannot normalize error for zero pressure without defining a max rate"); + } else h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]); + } else h_rate[i] /= fabs(set[i].ptarget); + } + + if (max_h_rate != 0) + if (fabs(set[i].ptarget) > max_h_rate) + h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]); + + double offset = 0.5 * (domain->boxhi[i] - domain->boxlo[i]) * (1.0 + update->dt * h_rate[i]); + set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - offset; + set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + offset; } for (int i = 3; i < 6; i++) { - double shift = update->dt * set[i].pgain * (set[i].ptarget - tensor[i]); - if (i == 3) set[i].tilt_target = domain->xy + shift * domain->xprd; - else if (i == 4) set[i].tilt_target = domain->xz + shift * domain->xprd; - else set[i].tilt_target = domain->yz + shift * domain->yprd; + if (set[i].style != PRESSURE) continue; + + double L, tilt, pcurrent; + if (i == 3) { + L = domain->zprd; + tilt = domain->yz; + pcurrent = tensor[5]; + } else if (i == 4) { + L = domain->zprd; + tilt = domain->xz + update->dt; + pcurrent = tensor[4]; + } else { + L = domain->yprd; + tilt = domain->xy; + pcurrent = tensor[3]; + } + + h_rate[i] = L * set[i].pgain * (pcurrent - set[i].ptarget); + if (normalize_pressure_flag) { + if (set[i].ptarget == 0) { + if (max_h_rate == 0) { + error->all(FLERR, "Cannot normalize error for zero pressure without defining a max rate"); + } else h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]); + } else h_rate[i] /= fabs(set[i].ptarget); + } + + if (max_h_rate != 0) + if (fabs(h_rate[i]) > max_h_rate) + h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]); + + set[i].tilt_target = tilt + update->dt * h_rate[i]; } } @@ -1211,61 +1221,65 @@ void FixDeform::set_volume() for (int i = 0; i < 3; i++) { if (set[i].style != VOLUME) continue; + int dynamic1 = set[i].dynamic1; + int dynamic2 = set[i].dynamic2; + int fixed = set[i].fixed; double v0 = set[i].vol_start; - double center_start = 0.5 * (set[i].lo_start + set[i].hi_start); - double offset; + double shift; if (set[i].substyle == ONE_FROM_ONE) { - offset = 0.5 * (v0 / - (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / - (set[set[i].fixed].hi_start-set[set[i].fixed].lo_start)); + shift = 0.5 * (v0 / (set[dynamic1].hi_target - set[dynamic1].lo_target) / + (set[fixed].hi_start-set[fixed].lo_start)); } else if (set[i].substyle == ONE_FROM_TWO) { - offset = 0.5 * (v0 / - (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / - (set[set[i].dynamic2].hi_target - set[set[i].dynamic2].lo_target)); + shift = 0.5 * (v0 / (set[dynamic1].hi_target - set[dynamic1].lo_target) / + (set[dynamic2].hi_target - set[dynamic2].lo_target)); } else if (set[i].substyle == TWO_FROM_ONE) { - if (!set[i].coupled_flag) { - offset = 0.5 * sqrt(v0 * (set[i].hi_start - set[i].lo_start) / - (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target) / - (set[set[i].fixed].hi_start - set[set[i].fixed].lo_start)); + if (!vol_balance_flag) { + shift = 0.5 * sqrt(v0 * (set[i].hi_start - set[i].lo_start) / + (set[dynamic1].hi_target - set[dynamic1].lo_target) / + (set[fixed].hi_start - set[fixed].lo_start)); } else { double dt = update->dt; double e1i = set[i].prior_rate; - double e2i = set[set[i].fixed].prior_rate; - double L3 = (set[set[i].dynamic1].hi_target - set[set[i].dynamic1].lo_target); - double e3 = (L3 / set[set[i].dynamic1].box_length - 1.0) / dt; + double e2i = set[fixed].prior_rate; + double L1i = domain->boxhi[i] - domain->boxlo[i]; + double L2i = domain->boxhi[fixed] - domain->boxlo[fixed]; + double L3i = domain->boxhi[dynamic1] - domain->boxlo[dynamic1]; + double L3 = (set[dynamic1].hi_target - set[dynamic1].lo_target); + double e3 = (L3 / L3i - 1.0) / dt; double p1 = pressure->vector[i]; - double p2 = pressure->vector[set[i].fixed]; + double p2 = pressure->vector[fixed]; double p1i = set[i].prior_pressure; - double p2i = set[set[i].fixed].prior_pressure; + double p2i = set[fixed].prior_pressure; if (e3 == 0) { e1 = 0.0; e2 = 0.0; - offset = 0.5 * set[i].box_length; + shift = 0.5 * L1i; } else if (e1i == 0 || e2i == 0 || (p2 == p2i && p1 == p1i)) { - // If no prior strain or no change in pressure (initial step) just scale offset by relative box lengths - offset = 0.5 * sqrt(v0 * set[i].box_length / L3 / set[set[i].fixed].box_length); + // If no prior strain or no change in pressure (initial step) just scale shift by relative box lengths + shift = 0.5 * sqrt(v0 * L1i / L3 / L2i); } else { - if (! linked_pressure) { - // Calculate first strain rate by expanding stress to linear order in strain to achieve p1(t+dt) = p2(t+dt) - e1 = -e3 / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2); - e1 /= (p2 - p2i + (p1 - p1i) / e1i * e2i); - + if (!linked_pressure) { + // Calculate first strain rate by expanding stress to linear order, p1(t+dt) = p2(t+dt) // Calculate second strain rate to preserve volume - e2 = (1.0 - (1 + e3 * dt) * (1 + e1 * dt)); - e2 /= (1 + e3 * dt) * (1 + e1 * dt) * dt; + e1 = -e3 / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2) / (p2 - p2i + (p1 - p1i) / e1i * e2i); + e2 = (1.0 - (1 + e3 * dt) * (1 + e1 * dt)) / ((1 + e3 * dt) * (1 + e1 * dt) * dt); - offset = 0.5 * set[i].box_length * (1.0 + e1 * dt); + shift = 0.5 * L1i * (1.0 + e1 * dt); linked_pressure = 1; } else { - offset = 0.5 * set[i].box_length * (1.0 + e2 * dt); + // Already calculated value of e2 + shift = 0.5 * L1i * (1.0 + e2 * dt); } } } } - set[i].lo_target = center_start - offset; - set[i].hi_target = center_start + offset; + + h_rate[i] = (2.0 * shift / (domain->boxhi[i] - domain->boxlo[i]) - 1.0) / update->dt; + + set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift; + set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift; } } @@ -1296,6 +1310,9 @@ void FixDeform::restart(char *buf) set[i].hi_initial = set_restart[i].hi_initial; set[i].vol_initial = set_restart[i].vol_initial; set[i].tilt_initial = set_restart[i].tilt_initial; + set[i].saved = set_restart[i].saved; + set[i].prior_rate = set_restart[i].prior_rate; + set[i].prior_pressure = set_restart[i].prior_pressure; // check if style settings are consistent (should do the whole set?) if (set[i].style != set_restart[i].style) samestyle = 0; @@ -1344,6 +1361,20 @@ void FixDeform::options(int narg, char **arg) else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NOCOUPLE; else error->all(FLERR,"Illegal fix fix deform couple command: {}", arg[iarg+1]); iarg += 2; + } else if (strcmp(arg[iarg],"max/rate") == 0) { + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform max/rate", error); + max_h_rate = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (max_h_rate <= 0.0) + error->all(FLERR,"Maximum strain rate must be a positive, non-zero value"); + iarg += 2; + } else if (strcmp(arg[iarg],"normalize/pressure") == 0) { + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform normalize/pressure", error); + normalize_pressure_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"vol/balance/p") == 0) { + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix deform vol/balance/p", error); + vol_balance_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); + iarg += 2; } else error->all(FLERR,"Illegal fix deform command: {}", arg[iarg]); } @@ -1363,7 +1394,6 @@ double FixDeform::memory_usage() return bytes; } - /* ---------------------------------------------------------------------- */ int FixDeform::modify_param(int narg, char **arg) diff --git a/src/fix_deform.h b/src/fix_deform.h index 9ecb9a577d..eda97f7c90 100644 --- a/src/fix_deform.h +++ b/src/fix_deform.h @@ -44,17 +44,17 @@ class FixDeform : public Fix { protected: int dimension, triclinic, scaleflag, flipflag, pcouple; int flip, flipxy, flipxz, flipyz; - double *h_rate, *h_ratelo; + double *h_rate, *h_ratelo, max_h_rate; int varflag; // 1 if VARIABLE option is used, 0 if not int strain_flag; // 1 if strain-based option is used, 0 if not int volume_flag; // 1 if VOLUME option is used, 0 if not int pressure_flag; // 1 if pressure tensor used, 0 if not int kspace_flag; // 1 if KSpace invoked, 0 if not + int normalize_pressure_flag; // 1 if normalize pressure deviation by target + int vol_balance_flag; // 1 if pressures balanced when maintaining const vol std::vector rfix; // pointers to rigid fixes class Irregular *irregular; // for migrating atoms after box flips - double TWOPI; - char *id_temp, *id_press; class Compute *temperature, *pressure; int tflag, pflag; @@ -72,7 +72,6 @@ class FixDeform : public Fix { double vol_initial, vol_start; double ptarget, pgain; double prior_pressure, prior_rate; - double box_length; int saved; int fixed, dynamic1, dynamic2; char *hstr, *hratestr, *pstr; From 63618d249056b6008d34486301d841fa7bc47ccf Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 24 Oct 2022 10:59:22 -0600 Subject: [PATCH 004/448] Adding scalar pressure compute --- src/fix_deform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index db3fad6a95..29c286ed02 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -851,6 +851,7 @@ void FixDeform::end_of_step() if (pressure_flag) { temperature->compute_vector(); pressure->compute_vector(); + pressure->compute_scalar(); for (int i = 0; i < 3; i++) { if (!set[i].saved) { set[i].saved = 1; @@ -1157,6 +1158,7 @@ void FixDeform::set_pressure() if (set[i].style != PRESSURE && set[i].style != PMEAN) continue; h_rate[i] = set[i].pgain * (p_current[i] - set[i].ptarget); + if (normalize_pressure_flag) { if (set[i].ptarget == 0) { if (max_h_rate == 0) { From 7160594902f74dd8cb5d88e85a96c1930df67cd0 Mon Sep 17 00:00:00 2001 From: Ludwig Ahrens-Iwers Date: Tue, 31 Jan 2023 07:39:31 +0000 Subject: [PATCH 005/448] Warning for mobile electrode atoms --- src/ELECTRODE/electrode_math.h | 1 + src/ELECTRODE/fix_electrode_conp.cpp | 26 ++- unittest/force-styles/tests/in.conp | 1 + .../tests/kspace-ewald_conp_charge.yaml | 159 +++++++-------- .../tests/kspace-pppm_conp_charge.yaml | 185 +++++++++--------- 5 files changed, 200 insertions(+), 172 deletions(-) diff --git a/src/ELECTRODE/electrode_math.h b/src/ELECTRODE/electrode_math.h index 5992df2289..08689a5adc 100644 --- a/src/ELECTRODE/electrode_math.h +++ b/src/ELECTRODE/electrode_math.h @@ -18,6 +18,7 @@ #ifndef LMP_ELECTRODE_MATH_H #define LMP_ELECTRODE_MATH_H +#include "math.h" #include "math_const.h" namespace LAMMPS_NS { diff --git a/src/ELECTRODE/fix_electrode_conp.cpp b/src/ELECTRODE/fix_electrode_conp.cpp index dbb508a033..33632f9099 100644 --- a/src/ELECTRODE/fix_electrode_conp.cpp +++ b/src/ELECTRODE/fix_electrode_conp.cpp @@ -377,6 +377,30 @@ void FixElectrodeConp::init() if (strncmp(modify->fix[i]->style, "electrode", 9) == 0) count++; if (count > 1) error->all(FLERR, "More than one fix electrode"); + // make sure electrode atoms are not integrated if a matrix is used for electrode-electrode interaction + int const nlocal = atom->nlocal; + int *mask = atom->mask; + Fix **fix = modify->fix; + if (matrix_algo) { + std::vector integrate_ids = std::vector(); + for (int i = 0; i < modify->nfix; i++) { + if (fix[i]->time_integrate == 0) continue; + int electrode_mover = 0; + int fix_groupbit = fix[i]->groupbit; + for (int j = 0; j < nlocal; j++) + if ((mask[j] & fix_groupbit) && (mask[j] & groupbit)) electrode_mover = 1; + MPI_Allreduce(MPI_IN_PLACE, &electrode_mover, 1, MPI_INT, MPI_SUM, world); + if (electrode_mover && comm->me == 0) integrate_ids.push_back(fix[i]->id); + } + if (comm->me == 0) + for (char *fix_id : integrate_ids) + error->warning( + FLERR, + "Electrode atoms are integrated by fix {}, but fix electrode is using a matrix method. For " + "mobile electrodes use the conjugate gradient algorithm without matrix ('algo cg').", + fix_id); + } + // check for package intel if (etypes_neighlists) request_etypes_neighlists(); @@ -863,7 +887,7 @@ void FixElectrodeConp::update_charges() update_time += MPI_Wtime() - start; } -std::vector FixElectrodeConp::ele_ele_interaction(const std::vector& q_local) +std::vector FixElectrodeConp::ele_ele_interaction(const std::vector &q_local) { assert(q_local.size() == nlocalele); assert(algo == Algo::CG || algo == Algo::MATRIX_CG); diff --git a/unittest/force-styles/tests/in.conp b/unittest/force-styles/tests/in.conp index 92d2f63cd1..08673ec20b 100644 --- a/unittest/force-styles/tests/in.conp +++ b/unittest/force-styles/tests/in.conp @@ -22,3 +22,4 @@ angle_coeff * group bot type 1 group top type 2 +group ele type 1 2 diff --git a/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml b/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml index 73fb252226..9a09a933a4 100644 --- a/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml +++ b/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 23 Jun 2022 +lammps_version: 3 Nov 2022 tags: generated -date_generated: Wed Sep 21 13:52:53 2022 +date_generated: Fri Dec 16 10:49:39 2022 epsilon: 1e-12 skip_tests: gpu kokkos_omp omp prerequisites: ! | @@ -17,6 +17,7 @@ post_commands: ! | kspace_modify gewald 0.23118 kspace_modify slab ew2d fix fxcpm bot electrode/conp -1.0 1.805 couple top 1.0 symm on + fix fxforce ele setforce 0 0 0 input_file: in.conp pair_style: coul/long 15.0 pair_coeff: ! | @@ -40,85 +41,85 @@ init_forces: ! |2 10 -3.3526564930579039e-02 -2.3802275431282884e-03 -1.5617801011657175e+00 11 -5.1236396351794389e-02 -4.9531100598979201e-04 -1.5779995894034005e+00 12 -3.3740693032952060e-02 -1.0210406243572182e-03 -1.5630986537874150e+00 - 13 -1.1437102611353016e-03 -4.6454866413029015e-05 5.4282837980149448e-03 - 14 2.3914999373115431e-03 -1.6478680244651469e-04 2.9802178734319239e-02 - 15 3.9287193302652786e-05 -2.5715673267285659e-05 2.8944525105129479e-03 - 16 2.0458480716482328e-03 -1.2119161321908735e-04 3.3689550843809452e-02 - 17 -2.7146073277767471e-03 -8.2376243258224663e-04 2.6564130941474612e-02 - 18 1.3669692885198135e-03 -4.2357196145489820e-04 3.2396141113926739e-02 - 19 3.0143371860819995e-04 -8.6218593339583785e-04 2.6284521141350669e-02 - 20 1.1542435168435056e-03 -2.7252318260838826e-04 3.4237916528138110e-02 - 21 -1.2350056952573553e-03 4.8655691135364269e-04 5.9284283442393631e-03 - 22 2.3656743884722890e-03 9.6575340844312705e-04 2.9811074931784823e-02 - 23 4.6754986244969657e-05 3.0149464050350903e-04 3.4630785686112129e-03 - 24 2.0301227080749633e-03 6.3879578068684812e-04 3.3653437189053413e-02 - 25 -2.3656211013513076e-03 -8.0454594828768334e-04 2.8476980555362911e-02 - 26 1.1566723797447039e-03 -3.9614599888570504e-04 3.2873323713155905e-02 - 27 2.8784994028036400e-04 -8.3661697184444898e-04 2.8317655886021253e-02 - 28 9.3882364605486020e-04 -2.3327601777843495e-04 3.4334676606415648e-02 - 29 -4.7969977052124917e-04 -1.2933334305373028e-04 -1.2336987392568071e-02 - 30 6.4733118786851766e-05 -1.3190918849005797e-04 -1.2737933567178844e-02 - 31 2.4269094157913586e-04 -1.3093943526788584e-04 -1.2136133260085013e-02 - 32 1.7452552740941527e-04 -1.1792779046242341e-04 -1.4181538324619835e-02 - 33 -3.8366266481516803e-04 -7.1061854758754556e-05 -1.3699106365426135e-02 - 34 2.8849004082563746e-05 -5.7838605310673531e-05 -1.3764181266896890e-02 - 35 2.2648059665862587e-04 -7.2851385190891320e-05 -1.3537361892926607e-02 - 36 1.2929221129083645e-04 -4.2862960950045859e-05 -1.4926105930886896e-02 - 37 -4.7698025941707008e-04 2.9971529466656788e-04 -1.2393604822896313e-02 - 38 6.4231095731188766e-05 2.7548977518460050e-04 -1.2789498345723021e-02 - 39 2.4169204779864826e-04 3.0552093685810269e-04 -1.2193908285665961e-02 - 40 1.7324998349441456e-04 2.2898000918153004e-04 -1.4225267020837207e-02 - 41 -3.4345772150395188e-04 -9.8012060153887415e-05 -1.4482722052972283e-02 - 42 2.0345466940577010e-05 -8.5250083485342566e-05 -1.4497101004472062e-02 - 43 2.0917627239292995e-04 -1.0051271468149899e-04 -1.4335313646556430e-02 - 44 1.1456796622437295e-04 -6.7553675788598551e-05 -1.5543196158604005e-02 + 13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 30 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 31 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 32 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 33 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 34 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 35 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 36 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 37 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 38 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 39 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 40 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 41 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 42 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 43 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 44 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 run_vdwl: 0 -run_coul: 6.662694629990089 +run_coul: 6.662694556930399 run_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 run_forces: ! |2 - 1 2.4590612609445102e-04 1.9614041218568861e-03 3.0874291949281147e+00 - 2 -1.6646393233505193e-02 2.1136941574790400e-03 3.0751132981100078e+00 - 3 6.3535217476586373e-04 5.1012487117746350e-03 3.0897301317927290e+00 - 4 -1.6598677148313409e-02 6.0535154567108685e-03 3.0778495346381409e+00 - 5 4.2257888534896988e-02 -2.3028533365965051e-03 -1.5593100596807521e+00 - 6 5.5690508027606708e-02 -2.4104721624763235e-03 -1.5626897542440843e+00 - 7 4.2243973130370149e-02 -7.6153220413259775e-04 -1.5604618260035832e+00 - 8 5.5862703939049158e-02 -9.9825803703216718e-04 -1.5640031014325448e+00 - 9 -5.1059409954744304e-02 -2.3195553026588347e-03 -1.5701677851024036e+00 - 10 -3.3824298857146967e-02 -2.3375522139358631e-03 -1.5551647619109401e+00 - 11 -5.1334079184640377e-02 -5.0583705005136689e-04 -1.5712298444761112e+00 - 12 -3.4037363466305925e-02 -1.0275978089057873e-03 -1.5564691026885336e+00 - 13 -1.1767076011504501e-03 -4.7681963272732406e-05 5.6177800544716262e-03 - 14 2.3826294437743331e-03 -1.6349140124449633e-04 2.9836275824428962e-02 - 15 4.1635367214843796e-05 -2.7287283914685102e-05 3.0967012748694773e-03 - 16 2.0334138778396313e-03 -1.1986039772787527e-04 3.3679727731055195e-02 - 17 -2.7026084826568797e-03 -8.1815079485725360e-04 2.6574868248546435e-02 - 18 1.3603406762441243e-03 -4.1902595052936860e-04 3.2373613783594497e-02 - 19 2.9940467686436986e-04 -8.5646794759970863e-04 2.6296909514905095e-02 - 20 1.1469475577225402e-03 -2.6907093945665336e-04 3.4197589258157073e-02 - 21 -1.2662578686531134e-03 4.9770031968890231e-04 6.1141650873547037e-03 - 22 2.3568409810395020e-03 9.5671841592085381e-04 2.9844352492872490e-02 - 23 4.8965507374742117e-05 3.1616579858329929e-04 3.6613180489820005e-03 - 24 2.0177821554069170e-03 6.3083810187911310e-04 3.3643353017422439e-02 - 25 -2.3537455003017457e-03 -7.9846295760147956e-04 2.8468250829639500e-02 - 26 1.1507655048236000e-03 -3.9159985067612060e-04 3.2839870487003708e-02 - 27 2.8582562554448814e-04 -8.3038492818152999e-04 2.8309777443009273e-02 - 28 9.3274285761092680e-04 -2.2997823984283208e-04 3.4287630335266286e-02 - 29 -4.7502414048888327e-04 -1.2847214455389489e-04 -1.2453998829891042e-02 - 30 6.3675154563755000e-05 -1.3104204562344653e-04 -1.2848240218511071e-02 - 31 2.4068203429808906e-04 -1.3007692195448562e-04 -1.2254443488117142e-02 - 32 1.7286880375665112e-04 -1.1713944254614034e-04 -1.4279748536149278e-02 - 33 -3.7975304094097439e-04 -7.0481503989778179e-05 -1.3805675914786045e-02 - 34 2.8172580422574018e-05 -5.7414812953860394e-05 -1.3867744309035916e-02 - 35 2.2448042926710853e-04 -7.2258368796568220e-05 -1.3645037267085249e-02 - 36 1.2804302797390986e-04 -4.2547023166907131e-05 -1.5019384166440140e-02 - 37 -4.7231656130619122e-04 2.9759414220405656e-04 -1.2510308090752414e-02 - 38 6.3181133759042352e-05 2.7366531591578288e-04 -1.2899551129187765e-02 - 39 2.3968383978790597e-04 3.0338116638639894e-04 -1.2311919409216509e-02 - 40 1.7159804905743131e-04 2.2744508340904917e-04 -1.4323246783684936e-02 - 41 -3.3986046011853923e-04 -9.7274166717457145e-05 -1.4583591616566860e-02 - 42 1.9775995840422978e-05 -8.4675117269364898e-05 -1.4595729099929700e-02 - 43 2.0725822207231137e-04 -9.9764702521265135e-05 -1.4437236974797743e-02 - 44 1.1345006523014360e-04 -6.7103771021504086e-05 -1.5632251527466189e-02 + 1 2.4590683616589461e-04 1.9614031577863586e-03 3.0874292323214023e+00 + 2 -1.6646391056109371e-02 2.1136931488457949e-03 3.0751133344119812e+00 + 3 6.3535284261970041e-04 5.1012486499622143e-03 3.0897301693141319e+00 + 4 -1.6598674977089525e-02 6.0535154273403009e-03 3.0778495712965137e+00 + 5 4.2257890248609424e-02 -2.3028528771735519e-03 -1.5593100743454233e+00 + 6 5.5690508840794913e-02 -2.4104716736065191e-03 -1.5626897684026875e+00 + 7 4.2243974848895624e-02 -7.6153214117135259e-04 -1.5604618407191599e+00 + 8 5.5862704771247232e-02 -9.9825804058303641e-04 -1.5640031157582626e+00 + 9 -5.1059412079591339e-02 -2.3195548367148278e-03 -1.5701677996288432e+00 + 10 -3.3824301875619064e-02 -2.3375517362836449e-03 -1.5551647759943841e+00 + 11 -5.1334081278836839e-02 -5.0583707575588808e-04 -1.5712298590599834e+00 + 12 -3.4037366496628307e-02 -1.0275977984217531e-03 -1.5564691169384570e+00 + 13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 30 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 31 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 32 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 33 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 34 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 35 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 36 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 37 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 38 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 39 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 40 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 41 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 42 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 43 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 44 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 ... diff --git a/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml b/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml index 84148fc816..9322cae321 100644 --- a/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml +++ b/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 23 Jun 2022 +lammps_version: 3 Nov 2022 tags: generated -date_generated: Wed Sep 21 13:52:39 2022 +date_generated: Fri Dec 16 10:53:11 2022 epsilon: 3e-12 skip_tests: gpu kokkos_omp omp prerequisites: ! | @@ -17,6 +17,7 @@ post_commands: ! | kspace_modify gewald 0.23118 kspace_modify slab 3.0 fix fxcpm bot electrode/conp -1.0 1.805 couple top 1.0 symm on + fix fxforce ele setforce 0 0 0 input_file: in.conp pair_style: coul/long 15.0 pair_coeff: ! | @@ -24,101 +25,101 @@ pair_coeff: ! | extract: ! "" natoms: 44 init_vdwl: 0 -init_coul: 2.2156402256727614 +init_coul: 2.2156402256727743 init_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 init_forces: ! |2 - 1 2.0996096688279944e-04 1.9837586784580306e-03 3.1004822661058822e+00 - 2 -1.6783332510617883e-02 2.1368843599407611e-03 3.0880130470329230e+00 - 3 6.0300296042517466e-04 5.1688381279905342e-03 3.1028182137891114e+00 - 4 -1.6735061532950901e-02 6.1290626039690339e-03 3.0907879891042755e+00 - 5 4.2014131860757913e-02 -2.3478381081742388e-03 -1.5658874682481487e+00 - 6 5.5659823770659422e-02 -2.4566144388410410e-03 -1.5693278833316506e+00 - 7 4.1999624791768865e-02 -7.5066218795240259e-04 -1.5670569809441617e+00 - 8 5.5832732887661884e-02 -9.8883264742169940e-04 -1.5706605160409139e+00 - 9 -5.0976953599115804e-02 -2.3653810185280950e-03 -1.5768945194236066e+00 - 10 -3.3513771125456657e-02 -2.3824712764543426e-03 -1.5616806812004886e+00 - 11 -5.1253442064492741e-02 -4.9304425051529275e-04 -1.5779738349804424e+00 - 12 -3.3727836471637192e-02 -1.0188844490582761e-03 -1.5630041309277038e+00 - 13 -1.1453068449918257e-03 -4.7335833322794788e-05 5.4292779404649470e-03 - 14 2.3900993287279790e-03 -1.6878550058260119e-04 2.9808528147740175e-02 - 15 4.0078428215627730e-05 -2.6184607051201481e-05 2.8941780231019881e-03 - 16 2.0473353699190459e-03 -1.2552209515766962e-04 3.3684989110502959e-02 - 17 -2.7210216747431955e-03 -8.2349543008294359e-04 2.6567504438257068e-02 - 18 1.3656002828979516e-03 -4.2323438710338486e-04 3.2404938547366383e-02 - 19 3.0785575286292939e-04 -8.6186674263511191e-04 2.6288541663855129e-02 - 20 1.1555469330548321e-03 -2.7230960410720359e-04 3.4235148032534163e-02 - 21 -1.2368093613861506e-03 4.8760847861882366e-04 5.9296798954256557e-03 - 22 2.3643140421916085e-03 9.6975102599399746e-04 2.9817231402721564e-02 - 23 4.7705653522709085e-05 3.0203836842154655e-04 3.4631818106649337e-03 - 24 2.0316297431160258e-03 6.4335031755788927e-04 3.3648629802522749e-02 - 25 -2.3728144718995455e-03 -8.0497592536520339e-04 2.8474707915345274e-02 - 26 1.1555985481661916e-03 -3.9649433660109970e-04 3.2876098209196375e-02 - 27 2.9459292459149998e-04 -8.3700881746301306e-04 2.8316136079038545e-02 - 28 9.4027352090446912e-04 -2.3371025598546553e-04 3.4325153603153732e-02 - 29 -5.2133856931286127e-04 -1.4498587872629142e-04 -1.2345168780426297e-02 - 30 7.0344538924238829e-05 -1.4805754895657979e-04 -1.2765142487049358e-02 - 31 2.7857644686035687e-04 -1.4667349483298643e-04 -1.2140095836769501e-02 - 32 1.7479865631996218e-04 -1.3335074368636031e-04 -1.4152171307753206e-02 - 33 -4.2607366742485959e-04 -7.2661709209033136e-05 -1.3713642029394900e-02 - 34 3.4224570995904750e-05 -5.9352423088202727e-05 -1.3797063100154012e-02 - 35 2.6332638434852483e-04 -7.4573909050170201e-05 -1.3548481212572565e-02 - 36 1.2956589882656260e-04 -4.4469640559387641e-05 -1.4903750420442119e-02 - 37 -5.1855202137555164e-04 3.1768672664149645e-04 -1.2402413876588990e-02 - 38 6.9744637010106164e-05 2.9379923413403201e-04 -1.2817238930047764e-02 - 39 2.7753815671547851e-04 3.2364879063853462e-04 -1.2198687630220518e-02 - 40 1.7359211286247436e-04 2.4661766514410969e-04 -1.4196607980261094e-02 - 41 -3.8501793941197521e-04 -9.8818538537990245e-05 -1.4472863167957050e-02 - 42 2.5779944975443997e-05 -8.5975255466014692e-05 -1.4504933662725301e-02 - 43 2.4535595442148292e-04 -1.0127263490049206e-04 -1.4321758087878972e-02 - 44 1.1457678622968817e-04 -6.8200688092186871e-05 -1.5499407046729242e-02 + 1 2.0996096688278619e-04 1.9837586784579742e-03 3.1004822661058862e+00 + 2 -1.6783332510618001e-02 2.1368843599407208e-03 3.0880130470329266e+00 + 3 6.0300296042516458e-04 5.1688381279906756e-03 3.1028182137891149e+00 + 4 -1.6735061532951023e-02 6.1290626039691648e-03 3.0907879891042778e+00 + 5 4.2014131860757892e-02 -2.3478381081742106e-03 -1.5658874682481501e+00 + 6 5.5659823770659464e-02 -2.4566144388410275e-03 -1.5693278833316524e+00 + 7 4.1999624791768830e-02 -7.5066218795246862e-04 -1.5670569809441637e+00 + 8 5.5832732887661912e-02 -9.8883264742175101e-04 -1.5706605160409157e+00 + 9 -5.0976953599115776e-02 -2.3653810185280764e-03 -1.5768945194236081e+00 + 10 -3.3513771125456580e-02 -2.3824712764543249e-03 -1.5616806812004898e+00 + 11 -5.1253442064492706e-02 -4.9304425051534804e-04 -1.5779738349804435e+00 + 12 -3.3727836471637108e-02 -1.0188844490583379e-03 -1.5630041309277052e+00 + 13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 30 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 31 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 32 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 33 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 34 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 35 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 36 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 37 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 38 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 39 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 40 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 41 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 42 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 43 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 44 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 run_vdwl: 0 -run_coul: 6.662844717848837 +run_coul: 6.662844644802018 run_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 run_forces: ! |2 - 1 2.4838374656870440e-04 1.9503798034564181e-03 3.0873204052231675e+00 - 2 -1.6652792550963507e-02 2.1026197438206527e-03 3.0749612313228378e+00 - 3 6.3785681925848106e-04 5.1125747842690368e-03 3.0896293583134606e+00 - 4 -1.6605065971975488e-02 6.0649203428150876e-03 3.0777057441315305e+00 - 5 4.2242720963296274e-02 -2.3050719334346786e-03 -1.5592361839228894e+00 - 6 5.5714272244614366e-02 -2.4127241272198356e-03 -1.5626351053810128e+00 - 7 4.2228744507856318e-02 -7.5938680980830215e-04 -1.5603926173148708e+00 - 8 5.5886400463161408e-02 -9.9610508394880446e-04 -1.5639532943562677e+00 - 9 -5.1076260774663269e-02 -2.3218376339398856e-03 -1.5701372580807029e+00 - 10 -3.3811558405906099e-02 -2.3397301017438945e-03 -1.5550660295253020e+00 - 11 -5.1350900750795785e-02 -5.0363945411724615e-04 -1.5712039970018119e+00 - 12 -3.4024561045577786e-02 -1.0255079139096881e-03 -1.5563752017113657e+00 - 13 -1.1783355827329363e-03 -4.8584425213699084e-05 5.6187810005819084e-03 - 14 2.3812433011153062e-03 -1.6745821939573657e-04 2.9842604329757397e-02 - 15 4.2472634867315832e-05 -2.7784504406519571e-05 3.0964622176844602e-03 - 16 2.0348857368272344e-03 -1.2415016403438035e-04 3.3675205082768264e-02 - 17 -2.7089567432503573e-03 -8.1788662401764114e-04 2.6578231998707000e-02 - 18 1.3589859834844708e-03 -4.1869284075205220e-04 3.2382362625908642e-02 - 19 3.0575998684987288e-04 -8.5615261685325662e-04 2.6300918365183709e-02 - 20 1.1482382726252934e-03 -2.6885864047674810e-04 3.4194853807722227e-02 - 21 -1.2680891310638792e-03 4.9877122346546224e-04 6.1154164862940522e-03 - 22 2.3554944751877239e-03 9.6068685589809240e-04 2.9850487598267021e-02 - 23 4.9959016556344255e-05 3.1673714227676154e-04 3.6614479955528112e-03 - 24 2.0192733259603224e-03 6.3534849028298586e-04 3.3638585635455770e-02 - 25 -2.3608563352120440e-03 -7.9888874314704573e-04 2.8465994834953420e-02 - 26 1.1497012824588045e-03 -3.9194609517719160e-04 3.2842627585052867e-02 - 27 2.9249325092285189e-04 -8.3077344407106490e-04 2.8308269755484061e-02 - 28 9.3417705968603434e-04 -2.3040781495046246e-04 3.4278191255128133e-02 - 29 -5.1652528928022433e-04 -1.4404353042683099e-04 -1.2462150875064196e-02 - 30 6.9288940488795056e-05 -1.4710788156542239e-04 -1.2875332550432134e-02 - 31 2.7644608312646314e-04 -1.4573040212804145e-04 -1.2258366148270420e-02 - 32 1.7312052380602741e-04 -1.3246895843787418e-04 -1.4250473428514749e-02 - 33 -4.2201273060055557e-04 -7.2075937985441126e-05 -1.3820153961237609e-02 - 34 3.3549241776445280e-05 -5.8924381188893540e-05 -1.3900492261623465e-02 - 35 2.6119063773180797e-04 -7.3975274656908229e-05 -1.3656086631368676e-02 - 36 1.2829798186810219e-04 -4.4146305074977061e-05 -1.4997087695938056e-02 - 37 -5.1375024459660446e-04 3.1547360096919003e-04 -1.2519087424517190e-02 - 38 6.8697062203298403e-05 2.9188411177715840e-04 -1.2927175328444108e-02 - 39 2.7540798890579723e-04 3.2141722701930868e-04 -1.2316658300695350e-02 - 40 1.7191884579607650e-04 2.4497453169432579e-04 -1.4294679585099376e-02 - 41 -3.8125358429239129e-04 -9.8073404256391242e-05 -1.4573806404289218e-02 - 42 2.5208712883704254e-05 -8.5394829999053783e-05 -1.4603558816163261e-02 - 43 2.4329125095209453e-04 -1.0051714757303402e-04 -1.4423743772053488e-02 - 44 1.1343880007511943e-04 -6.7742613833467660e-05 -1.5588639087563531e-02 + 1 2.4838446863948484e-04 1.9503788466162767e-03 3.0873204426106526e+00 + 2 -1.6652790386576983e-02 2.1026187422225470e-03 3.0749612676149027e+00 + 3 6.3785749905503929e-04 5.1125747155211223e-03 3.0896293958296615e+00 + 4 -1.6605063813844967e-02 6.0649203065202420e-03 3.0777057807809554e+00 + 5 4.2242722672783783e-02 -2.3050714764837449e-03 -1.5592361985876177e+00 + 6 5.5714273062644280e-02 -2.4127236407636868e-03 -1.5626351195306318e+00 + 7 4.2228746222139874e-02 -7.5938674438442175e-04 -1.5603926320308752e+00 + 8 5.5886401300220823e-02 -9.9610508505007457e-04 -1.5639533086735800e+00 + 9 -5.1076262902731109e-02 -2.3218371704875071e-03 -1.5701372726008265e+00 + 10 -3.3811561421032664e-02 -2.3397296264742753e-03 -1.5550660436064545e+00 + 11 -5.1350902848165472e-02 -5.0363947736730796e-04 -1.5712040115797643e+00 + 12 -3.4024564072498598e-02 -1.0255079009805080e-03 -1.5563752159595892e+00 + 13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 30 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 31 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 32 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 33 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 34 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 35 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 36 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 37 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 38 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 39 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 40 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 41 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 42 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 43 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 44 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 ... From 7d8866891a67878c001e292a21527bb750f7d964 Mon Sep 17 00:00:00 2001 From: Ludwig Ahrens Date: Wed, 1 Feb 2023 10:15:11 +0100 Subject: [PATCH 006/448] Call particle_map in compute of pppm/electrode --- src/ELECTRODE/pppm_electrode.cpp | 137 +++++++++++++++---------------- 1 file changed, 68 insertions(+), 69 deletions(-) diff --git a/src/ELECTRODE/pppm_electrode.cpp b/src/ELECTRODE/pppm_electrode.cpp index 7418a052e6..3bc1364c99 100644 --- a/src/ELECTRODE/pppm_electrode.cpp +++ b/src/ELECTRODE/pppm_electrode.cpp @@ -445,6 +445,7 @@ void PPPMElectrode::compute(int eflag, int vflag) start_compute(); + /* if (compute_vector_called && last_invert_source) { // electrolyte_density_brick is filled, so we can grab only electrode atoms. // Does not work for direct cg algorithm because electrode charges change after compute_vector. @@ -460,15 +461,17 @@ void PPPMElectrode::compute(int eflag, int vflag) density_brick[nz][ny][nx] += electrolyte_density_brick[nz][ny][nx]; } } else { - make_rho(); + */ + particle_map(); + make_rho(); - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition - gc->reverse_comm(Grid3d::KSPACE, this, REVERSE_RHO, 1, sizeof(FFT_SCALAR), gc_buf1, gc_buf2, - MPI_FFT_SCALAR); - } + gc->reverse_comm(Grid3d::KSPACE, this, REVERSE_RHO, 1, sizeof(FFT_SCALAR), gc_buf1, gc_buf2, + MPI_FFT_SCALAR); + //} brick2fft(); @@ -591,6 +594,7 @@ void PPPMElectrode::compute_vector(double *vec, int sensor_grpbit, int source_gr // electrolyte density (without writing an additional function) FFT_SCALAR ***density_brick_real = density_brick; FFT_SCALAR *density_fft_real = density_fft; + particle_map(); make_rho_in_brick(source_grpbit, electrolyte_density_brick, invert_source); density_brick = electrolyte_density_brick; density_fft = electrolyte_density_fft; @@ -674,7 +678,8 @@ void PPPMElectrode::compute_matrix(bigint *imat, double **matrix, bool timer_fla // fft green's function k -> r (double) double *greens_real; memory->create(greens_real, nz_pppm * ny_pppm * nx_pppm, "pppm/electrode:greens_real"); - memset(greens_real, 0, (std::size_t)nz_pppm * (std::size_t)ny_pppm * (std::size_t)nx_pppm * sizeof(double)); + memset(greens_real, 0, + (std::size_t) nz_pppm * (std::size_t) ny_pppm * (std::size_t) nx_pppm * sizeof(double)); for (int i = 0, n = 0; i < nfft; i++) { work2[n++] = greensfn[i]; work2[n++] = ZEROF; @@ -867,7 +872,7 @@ void PPPMElectrode::two_step_multiplication(bigint *imat, double *greens_real, d double **gw; memory->create(gw, nmat, nxyz, "pppm/electrode:gw"); - memset(&(gw[0][0]), 0, (std::size_t)nmat * (std::size_t)nxyz * sizeof(double)); + memset(&(gw[0][0]), 0, (std::size_t) nmat * (std::size_t) nxyz * sizeof(double)); auto fmod = [](int x, int n) { // fast unsigned mod int r = abs(x); @@ -986,17 +991,18 @@ void PPPMElectrode::allocate() // returns local owned and ghost grid bounds // setup communication patterns and buffers - gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out); + gc = new Grid3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_in, nxhi_in, nylo_in, nyhi_in, + nzlo_in, nzhi_in, nxlo_out, nxhi_out, nylo_out, nyhi_out, nzlo_out, nzhi_out); - gc->setup_comm(ngc_buf1,ngc_buf2); + gc->setup_comm(ngc_buf1, ngc_buf2); - if (differentiation_flag) npergrid = 1; - else npergrid = 3; + if (differentiation_flag) + npergrid = 1; + else + npergrid = 3; - memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1"); - memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2"); + memory->create(gc_buf1, npergrid * ngc_buf1, "pppm:gc_buf1"); + memory->create(gc_buf2, npergrid * ngc_buf2, "pppm:gc_buf2"); // tally local grid sizes // ngrid = count of owned+ghost grid cells on this proc @@ -1005,67 +1011,63 @@ void PPPMElectrode::allocate() // nfft = FFT points in x-pencil FFT decomposition on this proc // nfft_both = greater of nfft and nfft_brick - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); + ngrid = (nxhi_out - nxlo_out + 1) * (nyhi_out - nylo_out + 1) * (nzhi_out - nzlo_out + 1); - nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); + nfft_brick = (nxhi_in - nxlo_in + 1) * (nyhi_in - nylo_in + 1) * (nzhi_in - nzlo_in + 1); - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); + nfft = (nxhi_fft - nxlo_fft + 1) * (nyhi_fft - nylo_fft + 1) * (nzhi_fft - nzlo_fft + 1); - nfft_both = MAX(nfft,nfft_brick); + nfft_both = MAX(nfft, nfft_brick); // allocate distributed grid data - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); + memory->create3d_offset(density_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, + "pppm:density_brick"); - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); + memory->create(density_fft, nfft_both, "pppm:density_fft"); + memory->create(greensfn, nfft_both, "pppm:greensfn"); + memory->create(work1, 2 * nfft_both, "pppm:work1"); + memory->create(work2, 2 * nfft_both, "pppm:work2"); + memory->create(vg, nfft_both, 6, "pppm:vg"); if (triclinic == 0) { - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); + memory->create1d_offset(fkx, nxlo_fft, nxhi_fft, "pppm:fkx"); + memory->create1d_offset(fky, nylo_fft, nyhi_fft, "pppm:fky"); + memory->create1d_offset(fkz, nzlo_fft, nzhi_fft, "pppm:fkz"); } else { - memory->create(fkx,nfft_both,"pppm:fkx"); - memory->create(fky,nfft_both,"pppm:fky"); - memory->create(fkz,nfft_both,"pppm:fkz"); + memory->create(fkx, nfft_both, "pppm:fkx"); + memory->create(fky, nfft_both, "pppm:fky"); + memory->create(fkz, nfft_both, "pppm:fkz"); } if (differentiation_flag == 1) { - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); + memory->create3d_offset(u_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, + "pppm:u_brick"); - memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1"); - memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2"); - memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3"); - memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4"); - memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5"); - memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6"); + memory->create(sf_precoeff1, nfft_both, "pppm:sf_precoeff1"); + memory->create(sf_precoeff2, nfft_both, "pppm:sf_precoeff2"); + memory->create(sf_precoeff3, nfft_both, "pppm:sf_precoeff3"); + memory->create(sf_precoeff4, nfft_both, "pppm:sf_precoeff4"); + memory->create(sf_precoeff5, nfft_both, "pppm:sf_precoeff5"); + memory->create(sf_precoeff6, nfft_both, "pppm:sf_precoeff6"); } else { - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); + memory->create3d_offset(vdx_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, + "pppm:vdx_brick"); + memory->create3d_offset(vdy_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, + "pppm:vdy_brick"); + memory->create3d_offset(vdz_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, + "pppm:vdz_brick"); } // summation coeffs order_allocated = order; - if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, - "pppm:drho_coeff"); + if (!stagger_flag) memory->create(gf_b, order, "pppm:gf_b"); + memory->create2d_offset(rho1d, 3, -order / 2, order / 2, "pppm:rho1d"); + memory->create2d_offset(drho1d, 3, -order / 2, order / 2, "pppm:drho1d"); + memory->create2d_offset(rho_coeff, order, (1 - order) / 2, order / 2, "pppm:rho_coeff"); + memory->create2d_offset(drho_coeff, order, (1 - order) / 2, order / 2, "pppm:drho_coeff"); // create 2 FFTs and a Remap // 1st FFT keeps data in FFT decomposition @@ -1074,20 +1076,17 @@ void PPPMElectrode::allocate() int tmp; - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp,collective_flag); + fft1 = new FFT3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft, + nzlo_fft, nzhi_fft, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft, nzlo_fft, nzhi_fft, + 0, 0, &tmp, collective_flag); - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp,collective_flag); + fft2 = new FFT3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft, + nzlo_fft, nzhi_fft, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in, 0, 0, + &tmp, collective_flag); - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION,collective_flag); + remap = new Remap(lmp, world, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in, nxlo_fft, + nxhi_fft, nylo_fft, nyhi_fft, nzlo_fft, nzhi_fft, 1, 0, 0, FFT_PRECISION, + collective_flag); // ELECTRODE specific allocations From 59bdb136dcada2c62d7c8be7ea298eb3fa26ce00 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Wed, 31 May 2023 14:50:55 +0000 Subject: [PATCH 007/448] added qtotal keyword --- doc/src/fix_electrode.rst | 17 +++- .../PACKAGES/electrode/graph-il/in.nonneut | 31 +++++++ .../PACKAGES/electrode/graph-il/in.nonneut2 | 33 +++++++ src/ELECTRODE/fix_electrode_conp.cpp | 86 ++++++++++++++++--- src/ELECTRODE/fix_electrode_conp.h | 10 ++- src/ELECTRODE/fix_electrode_thermo.cpp | 5 +- 6 files changed, 168 insertions(+), 14 deletions(-) create mode 100644 examples/PACKAGES/electrode/graph-il/in.nonneut create mode 100644 examples/PACKAGES/electrode/graph-il/in.nonneut2 diff --git a/doc/src/fix_electrode.rst b/doc/src/fix_electrode.rst index 3d543f08d2..5685482a71 100644 --- a/doc/src/fix_electrode.rst +++ b/doc/src/fix_electrode.rst @@ -45,7 +45,7 @@ Syntax rng_v = integer used to initialize random number generator * zero or more keyword/value pairs may be appended -* keyword = *algo* or *symm* or *couple* or *etypes* or *ffield* or *write_mat* or *write_inv* or *read_mat* or *read_inv* +* keyword = *algo* or *symm* or *couple* or *etypes* or *ffield* or *write_mat* or *write_inv* or *read_mat* or *read_inv* or *qtotal* .. parsed-literal:: @@ -68,6 +68,8 @@ Syntax filename = file from which to read elastance matrix *read_inv* value = filename filename = file from which to read inverted matrix + *qtotal* value = number or *v_* equal-style variable + add overall potential so that all electrode charges add up to *qtotal* Examples """""""" @@ -249,6 +251,19 @@ be enabled if any electrode particle has the same type as any electrolyte particle (which would be unusual in a typical simulation) and the fix will issue an error in that case. +.. versionchanged:: qtotal + +The keyword *qtotal* causes *fix electrode/conp* and *fix electrode/thermo* +to add an overall potential to all electrodes so that the total charge on +the electrodes is a specified amount (which may be an equal-style variable). +For example, if a user wanted to simulate a solution of excess cations +such that the total electrolyte charge is +2, setting *qtotal -2* would cause +the total electrode charge to be -2, so that the simulation box remains overall +electroneutral. Since *fix electrode/conq* constrains the total charges of +individual electrodes, and since *symm on* constrains the total charge of all +electrodes to be zero, either option is incompatible with the *qtotal* keyword +(even if *qtotal* is set to zero). + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" diff --git a/examples/PACKAGES/electrode/graph-il/in.nonneut b/examples/PACKAGES/electrode/graph-il/in.nonneut new file mode 100644 index 0000000000..3027faf0cc --- /dev/null +++ b/examples/PACKAGES/electrode/graph-il/in.nonneut @@ -0,0 +1,31 @@ +# electrodes, overall non-neutral +# for graphene-ionic liquid supercapacitor + +boundary p p f # slab calculation +include settings.mod # styles, groups, computes and fixes +kspace_modify slab 3.0 + +# make an extra anion +variable qmodify index 1 +variable vdiff index 1 +# these values can be changed, e.g. ${LAMMPS_EXECUTABLE} -i in.nonneut -var qmodify 0.9 -var vdiff -2 +# fix electrode/* needs equal style variables: +variable qmodify_equal equal v_qmodify +variable vdiff_equal equal v_vdiff + +create_atoms 4 single 16.1 17.2 0 +set atom 3777 charge $(-v_qmodify) + +fix c top electrode/conp v_vdiff_equal 1.979 couple bot 0 etypes on qtotal v_qmodify_equal +# to test electrode/thermo: +# variable vbot equal 0 +# fix c top electrode/thermo v_vdiff_equal 1.979 couple bot v_vbot etypes on qtotal v_qmodify_equal temp 310 100 12309 # symm on + +variable dv equal f_c[1]-f_c[2] + +variable qelec equal c_qbot+c_qtop +compute qall all reduce sum v_q # total system charge + +thermo 50 +thermo_style custom step temp c_ctemp epair etotal c_qbot c_qtop f_c[1] f_c[2] v_dv v_qelec c_qall +run 500 diff --git a/examples/PACKAGES/electrode/graph-il/in.nonneut2 b/examples/PACKAGES/electrode/graph-il/in.nonneut2 new file mode 100644 index 0000000000..c73eee5db3 --- /dev/null +++ b/examples/PACKAGES/electrode/graph-il/in.nonneut2 @@ -0,0 +1,33 @@ +# electrodes, overall non-neutral +# for graphene-ionic liquid supercapacitor + +boundary p p f # slab calculation +include settings.mod # styles, groups, computes and fixes +kspace_modify slab 3.0 + +# make an extra anion +variable qmodify index 1 +variable vdiff index 1 +# these values can be changed, e.g. ${LAMMPS_EXECUTABLE} -i in.nonneut -var qmodify 0.9 -var vdiff -2 +create_atoms 4 single 16.1 17.2 0 +set atom 3777 charge $(-v_qmodify) + +fix c top electrode/conp v_vtop 1.979 couple bot v_vbot etypes on # symm on +# to test electrode/thermo: +#fix c top electrode/thermo v_vtop 1.979 couple bot v_vbot etypes on temp 310 100 12309 symm off +#but it looks like update_psi() is hardwired to impose electroneutrality + +variable csum equal f_c[1][2]+f_c[1][3]+f_c[2][2]+f_c[2][3] +variable cdiff equal 0.5*(f_c[1][2]-f_c[2][3]) +variable qdeficit equal v_qmodify-f_c[1][1]-f_c[2][1]-v_cdiff*v_vdiff +variable vphi equal v_qdeficit/v_csum +variable vtop equal v_vphi+0.5*v_vdiff +variable vbot equal v_vphi-0.5*v_vdiff +variable dv equal v_vtop-v_vbot + +variable qelec equal c_qbot+c_qtop +compute qall all reduce sum v_q # total system charge + +thermo 50 +thermo_style custom step temp c_ctemp epair etotal c_qbot c_qtop f_c[1] f_c[2] v_dv v_qelec c_qall +run 500 diff --git a/src/ELECTRODE/fix_electrode_conp.cpp b/src/ELECTRODE/fix_electrode_conp.cpp index 33632f9099..a7e6cbe922 100644 --- a/src/ELECTRODE/fix_electrode_conp.cpp +++ b/src/ELECTRODE/fix_electrode_conp.cpp @@ -103,20 +103,23 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) : mult_time = 0; n_call = n_cg_step = 0; + qtotal = 0.; + qtotal_var_style = VarStyle::UNSET; + // read fix command fixname = std::string(arg[0]); groups = std::vector(1, igroup); group_bits = std::vector(1, groupbit); group_psi_var_names = std::vector(1); group_psi_var_styles = std::vector(1, VarStyle::CONST); - group_psi = std::vector(1); + group_psi_const = std::vector(1); etypes_neighlists = false; if (strstr(arg[3], "v_") == arg[3]) { std::string vname = arg[3]; group_psi_var_names[0] = vname.substr(2); group_psi_var_styles[0] = VarStyle::EQUAL; } else - group_psi[0] = utils::numeric(FLERR, arg[3], false, lmp); + group_psi_const[0] = utils::numeric(FLERR, arg[3], false, lmp); char *eta_str = arg[4]; eta = utils::numeric(FLERR, eta_str, false, lmp); int iarg = 5; @@ -132,12 +135,12 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) : std::string vname = arg[iarg]; group_psi_var_names.push_back(vname.substr(2)); group_psi_var_styles.push_back(VarStyle::EQUAL); - group_psi.push_back(0.); + group_psi_const.push_back(0.); } else { std::string null; group_psi_var_names.push_back(null); group_psi_var_styles.push_back(VarStyle::CONST); - group_psi.push_back(utils::numeric(FLERR, arg[iarg], false, lmp)); + group_psi_const.push_back(utils::numeric(FLERR, arg[iarg], false, lmp)); } } else if ((strcmp(arg[iarg], "algo") == 0)) { if (!default_algo) error->one(FLERR, fmt::format("Algorithm can be set once, only")); @@ -195,6 +198,19 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) : thermo_temp = force->boltz / force->qe2f * utils::numeric(FLERR, arg[++iarg], false, lmp); thermo_time = utils::numeric(FLERR, arg[++iarg], false, lmp); thermo_init = utils::inumeric(FLERR, arg[++iarg], false, lmp); + } else if ((strcmp(arg[iarg], "qtotal") == 0)) { + if (iarg + 2 > narg) error->all(FLERR, "Need one argument after qtotal keyword"); + if (strcmp(this->style, "electrode/conq") == 0) + error->all(FLERR, "qtotal keyword not available for electrode/conq"); + ++iarg; + if (strstr(arg[iarg], "v_") == arg[iarg]) { + std::string vname = arg[iarg]; + qtotal_var_name = vname.substr(2); + qtotal_var_style = VarStyle::EQUAL; + } else { + qtotal = utils::numeric(FLERR, arg[iarg], false, lmp); + qtotal_var_style = VarStyle::CONST; + } // toggle parameters } else if ((strcmp(arg[iarg], "etypes") == 0)) { etypes_neighlists = utils::logical(FLERR, arg[++iarg], false, lmp); @@ -208,6 +224,12 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) : iarg++; } + if (qtotal_var_style != VarStyle::UNSET) { + if (symm) error->all(FLERR, "{} cannot use qtotal keyword with symm on", this->style); + } + + // computatonal potential + group_psi = std::vector(groups.size()); // union of all coupled groups std::string union_group = "conp_group"; std::string group_cmd = union_group + " union"; @@ -225,6 +247,7 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) : if (need_elec_vector) elec_vector = new ElectrodeVector(lmp, igroup, igroup, eta, false); assert(groups.size() == group_bits.size()); assert(groups.size() == group_psi.size()); + assert(groups.size() == group_psi_const.size()); assert(groups.size() == group_psi_var_styles.size()); assert(groups.size() == group_psi_var_names.size()); assert(igroup == elyt_vector->igroup); @@ -396,7 +419,8 @@ void FixElectrodeConp::init() for (char *fix_id : integrate_ids) error->warning( FLERR, - "Electrode atoms are integrated by fix {}, but fix electrode is using a matrix method. For " + "Electrode atoms are integrated by fix {}, but fix electrode is using a matrix method. " + "For " "mobile electrodes use the conjugate gradient algorithm without matrix ('algo cg').", fix_id); } @@ -475,6 +499,7 @@ void FixElectrodeConp::setup_post_neighbor() // get equal-style variable ids: group_psi_var_ids = std::vector(num_of_groups, -1); for (int g = 0; g < num_of_groups; g++) { + assert(group_psi_var_styles[g] != VarStyle::UNSET); if (group_psi_var_styles[g] == VarStyle::CONST) continue; const char *var_name = group_psi_var_names[g].c_str(); int var_id = input->variable->find(var_name); @@ -485,6 +510,16 @@ void FixElectrodeConp::setup_post_neighbor() fmt::format("Variable '{}' for fix electrode is not equal-style", var_name)); group_psi_var_ids[g] = var_id; } + if (qtotal_var_style == VarStyle::EQUAL) { + const char *var_name = qtotal_var_name.c_str(); + int var_id = input->variable->find(var_name); + if (var_id < 0) + error->all(FLERR, fmt::format("Variable '{}' for fix electrode does not exist", var_name)); + if (!input->variable->equalstyle(var_id)) + error->all(FLERR, + fmt::format("Variable '{}' for fix electrode is not equal-style", var_name)); + qtotal_var_id = var_id; + } // pair and list setups: @@ -834,6 +869,8 @@ void FixElectrodeConp::update_charges() } MPI_Allreduce(MPI_IN_PLACE, &sb_charges.front(), num_of_groups, MPI_DOUBLE, MPI_SUM, world); update_psi(); // use for equal-style and conq + if (qtotal_var_style != VarStyle::UNSET) + update_psi_qtotal(); // use for qtotal; same for thermo for (int g = 0; g < num_of_groups; g++) for (int j = 0; j < nlocalele; j++) q_local[j] += sd_vectors[g][list_iele[j]] * group_psi[g]; MPI_Barrier(world); @@ -936,12 +973,22 @@ std::vector FixElectrodeConp::gather_ngroup(std::vector x_local) } /* ---------------------------------------------------------------------- - ensure total electrode charge is 0 if symm + ensure total electrode charge is 0 if symm and qtotal if qtotal is used ------------------------------------------------------------------------- */ std::vector FixElectrodeConp::constraint_correction(std::vector x) { - return constraint_projection(x); + if (symm || qtotal_var_style != VarStyle::UNSET) { + if (qtotal_var_style == VarStyle::EQUAL) qtotal = input->variable->compute_equal(qtotal_var_id); + double sum = 0.; + for (double xi : x) sum += xi; + MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, world); + if (qtotal_var_style != VarStyle::UNSET) sum -= qtotal; + sum /= ngroup; + for (double &xi : x) xi -= sum; + return x; + } + return x; } /* ---------------------------------------------------------------------- @@ -950,7 +997,7 @@ std::vector FixElectrodeConp::constraint_correction(std::vector std::vector FixElectrodeConp::constraint_projection(std::vector x) { - if (symm) { + if (symm || qtotal_var_style != VarStyle::UNSET) { double sum = 0.; for (double xi : x) sum += xi; MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, world); @@ -1008,13 +1055,28 @@ std::vector FixElectrodeConp::times_elastance(std::vector x) void FixElectrodeConp::update_psi() { for (int g = 0; g < num_of_groups; g++) { - if (group_psi_var_styles[g] == VarStyle::CONST) continue; - group_psi[g] = input->variable->compute_equal(group_psi_var_ids[g]); + if (group_psi_var_styles[g] == VarStyle::CONST) + group_psi[g] = group_psi_const[g]; + else + group_psi[g] = input->variable->compute_equal(group_psi_var_ids[g]); } } /* ---------------------------------------------------------------------- */ +void FixElectrodeConp::update_psi_qtotal() +{ + if (qtotal_var_style == VarStyle::EQUAL) qtotal = input->variable->compute_equal(qtotal_var_id); + double q_current = 0.; + for (int i = 0; i < num_of_groups; i++) { + q_current += sb_charges[i]; + for (int j = 0; j < num_of_groups; j++) q_current += macro_capacitance[i][j] * group_psi[j]; + } + double add_psi = (qtotal - q_current) / macro_capacitance_sum; + for (int i = 0; i < num_of_groups; i++) group_psi[i] += add_psi; +} +/* ---------------------------------------------------------------------- */ + void FixElectrodeConp::compute_macro_matrices() { assert(algo == Algo::MATRIX_INV); @@ -1070,6 +1132,10 @@ void FixElectrodeConp::compute_macro_matrices() } } } + + macro_capacitance_sum = 0.; + for (int i = 0; i < num_of_groups; i++) + for (int j = 0; j < num_of_groups; j++) macro_capacitance_sum += macro_capacitance[i][j]; } /* ---------------------------------------------------------------------- */ diff --git a/src/ELECTRODE/fix_electrode_conp.h b/src/ELECTRODE/fix_electrode_conp.h index 1289d96281..689a44053c 100644 --- a/src/ELECTRODE/fix_electrode_conp.h +++ b/src/ELECTRODE/fix_electrode_conp.h @@ -71,7 +71,7 @@ class FixElectrodeConp : public Fix { protected: enum class Algo { MATRIX_INV, MATRIX_CG, CG }; - enum class VarStyle { CONST, EQUAL }; + enum class VarStyle { CONST, EQUAL, UNSET }; virtual void update_psi(); virtual void pre_update(){}; virtual void recompute_potential(std::vector, std::vector){}; @@ -80,6 +80,7 @@ class FixElectrodeConp : public Fix { virtual void compute_macro_matrices(); std::vector ele_ele_interaction(const std::vector &); std::vector group_psi; + std::vector group_psi_const; // needed to undo qtotal psi updates std::vector group_bits; std::vector groups; int num_of_groups; @@ -101,6 +102,10 @@ class FixElectrodeConp : public Fix { std::string fixname; // used by electrode/ffield to set up internal efield bool intelflag; inline virtual void intel_pack_buffers() {} + double qtotal; + std::string qtotal_var_name; + int qtotal_var_id; + VarStyle qtotal_var_style; private: std::string output_file_inv, output_file_mat, output_file_vec; @@ -143,6 +148,9 @@ class FixElectrodeConp : public Fix { std::vector gather_ngroup(std::vector); std::vector gather_elevec_local(ElectrodeVector *); void set_charges(std::vector); + // qtotal + double macro_capacitance_sum; + void update_psi_qtotal(); // fix-specific electrode ID storage system: diff --git a/src/ELECTRODE/fix_electrode_thermo.cpp b/src/ELECTRODE/fix_electrode_thermo.cpp index 52c0a3ce4c..3883aa40a2 100644 --- a/src/ELECTRODE/fix_electrode_thermo.cpp +++ b/src/ELECTRODE/fix_electrode_thermo.cpp @@ -47,7 +47,8 @@ FixElectrodeThermo::FixElectrodeThermo(LAMMPS *lmp, int narg, char **arg) : if (thermo_time < SMALL) error->all(FLERR, "Keyword temp not set or zero in electrode/thermo"); thermo_random = new RanMars(lmp, thermo_init); - if (group_psi_var_styles[0] == VarStyle::CONST) delta_psi_0 = group_psi[1] - group_psi[0]; + if (group_psi_var_styles[0] == VarStyle::CONST) + delta_psi_0 = group_psi_const[1] - group_psi_const[0]; } /* ----------------------------------------------------------------------- */ @@ -102,7 +103,7 @@ void FixElectrodeThermo::update_psi() double const delta_psi = group_psi_old[1] - group_psi_old[0]; // target potential difference from input parameters - if (group_psi_var_styles[0] != VarStyle::CONST) { + if (group_psi_var_styles[0] == VarStyle::EQUAL) { delta_psi_0 = input->variable->compute_equal(group_psi_var_ids[1]) - input->variable->compute_equal(group_psi_var_ids[0]); } From 6de50fbd33a130e8a2ecac07a5bc31ac5467969d Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 14 Jun 2023 13:39:24 -0600 Subject: [PATCH 008/448] Adding iso options to fix deform --- doc/src/fix_deform.rst | 46 ++++++++++++--- src/fix_deform.cpp | 129 ++++++++++++++++++++++++++++++++++++----- src/fix_deform.h | 2 + 3 files changed, 155 insertions(+), 22 deletions(-) diff --git a/doc/src/fix_deform.rst b/doc/src/fix_deform.rst index d46f1204f5..7a17f14fed 100644 --- a/doc/src/fix_deform.rst +++ b/doc/src/fix_deform.rst @@ -20,7 +20,7 @@ Syntax .. parsed-literal:: - parameter = *x* or *y* or *z* or *xy* or *xz* or *yz* + parameter = *x* or *y* or *z* or *xy* or *xz* or *yz* or *iso* *x*, *y*, *z* args = style value(s) style = *final* or *delta* or *scale* or *vel* or *erate* or *trate* or *volume* or *wiggle* or *variable* *final* values = lo hi @@ -71,6 +71,12 @@ Syntax *variable* values = v_name1 v_name2 v_name1 = variable with name1 for tilt change as function of time v_name2 = variable with name2 for change rate as function of time + *iso* = style value + style = *volume* or *pressure* + *volume* value = none = isotropically adjust system to preserve volume of system + *pressure* values = target gain + target = target mean pressure (pressure units) + gain = proportional gain constant (1/(time * pressure) or 1/time units) * zero or more keyword/value pairs may be appended * keyword = *remap* or *flip* or *units* or *couple* or *vol/balance/p* or *max/rate* or *normalize/pressure* @@ -88,12 +94,10 @@ Syntax box = distances are defined in simulation box units *couple* value = *none* or *xyz* or *xy* or *yz* or *xz* couple pressure values of various dimensions - *vol/balance/p* = *yes* or *no* + *vol/balance/p* value = *yes* or *no* Modifies the behavior of the *volume* option to try and balance pressures *max/rate* value = *rate* rate = maximum strain rate for pressure control - *normalize/pressure* value = *yes* or *no* - determine whether pressure deviation is normalized by target pressure Examples """""""" @@ -107,6 +111,7 @@ Examples fix 1 all deform 1 x pressure 2.0 0.1 normalize/pressure yes max/rate 0.001 fix 1 all deform 1 x trate 0.1 y volume z volume vol/balance/p yes fix 1 all deform 1 x trate 0.1 y pressure/mean 0.0 1.0 z pressure/mean 0.0 1.0 + fix 1 all deform 1 x trate 0.1 y trate -0.1 overlay/pressure/mean 1.0 0.1 Description """"""""""" @@ -318,8 +323,8 @@ mid point. The *pressure* style adjusts a dimensions's box length to control that component of the pressure tensor. This option attempts to maintain a -specified target value using a linear controller where the box length L -evolves according to the equation +specified target value using a linear controller where the box length +:math:`L` evolves according to the equation .. parsed-literal:: @@ -343,7 +348,7 @@ option. The *pressure/mean* style is changes a dimension in order to maintain a constant mean pressure defined as the trace of the pressure tensor. -This option is therefore very similar to the *presssure* style with +This option is therefore very similar to the *pressure* style with identical arguments except the current and target pressures refer to the mean trace of the pressure tensor. The same options also apply except for the :ref:`couple ` option. @@ -512,6 +517,25 @@ of the applied strain using the :ref:`max/rate ` option. ---------- +The *iso* parameter provides an additonal control over the x, y, +and z box lengths. This parameter can only be used in combination with +the *x*, *y*, or *z* comamnds: *vel*, *erate*, *trate*, *pressure*, or +*wiggle*. Note that this parameter will change the overall strain rate in +the *x*, *y*, or *z* dimensions. This is the meaning of its styles and values. + +The *volume* style isotropically scales box lengths to maintain a constant +box volume in response to deformation from other parameters. + +The *pressure* style controls the box volume to maintain the mean pressure +of the system. This is accomplished by isotropically scaling all box +lengths :math:`L` by an additional factor of :math:`k (P_t - P_m)` where +:math:`k` is the proportional gain constant, :math:`P_t` is the target +pressure, and :math:`P_m` is the current mean pressure (the trace of the +pressure tensor). This style allows one to control the deviatoric strain +tensor while maintaining a fixed mean pressure. + +---------- + All of the tilt styles change the xy, xz, yz tilt factors during a simulation. In LAMMPS, tilt factors (xy,xz,yz) for triclinic boxes are normally bounded by half the distance of the parallel box length. @@ -652,8 +676,12 @@ described below, which will cap the divergence. .. _deform_max_rate: The *max/rate* keyword sets an upper threshold, *rate*, that limits the -maximum magnitude of the strain rate applied in any dimension. This keyword -only applies to the *pressure* and *pressure/mean* options. +maximum magnitude of the instantaneous strain rate applied in any dimension. +This keyword only applies to the *pressure* and *pressure/mean* options. If +a pressure-controlled rate is used for both *iso* and either *x*, *y*, or +*z*, then this threshold will apply separately to each individual controller +such that the cumulative strain rate on a box dimension may be up to twice +the value of *rate*. .. _deform_couple: diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 29c286ed02..e76b34f302 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -65,8 +65,8 @@ irregular(nullptr), set(nullptr) // set defaults - set = new Set[6]; - memset(set,0,6*sizeof(Set)); + set = new Set[7]; + memset(set,0,7*sizeof(Set)); // parse arguments @@ -237,6 +237,25 @@ irregular(nullptr), set(nullptr) error->all(FLERR,"Illegal fix deform pressure gain, must be positive"); iarg += 4; } else error->all(FLERR,"Illegal fix deform command: {}", arg[iarg+1]); + } else if (strcmp(arg[iarg],"iso") == 0) { + index = 6; + if (strcmp(arg[iarg+1],"volume") == 0) { + set[index].style = VOLUME; + iarg += 2; + } else if (strcmp(arg[iarg+1],"pressure") == 0) { + if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix deform pressure", error); + set[index].style = PRESSURE; + if (strstr(arg[iarg+2],"v_") != arg[iarg+2]) { + set[index].ptarget = utils::numeric(FLERR,arg[iarg+2],false,lmp); + } else { + set[index].pstr = utils::strdup(&arg[iarg+2][2]); + set[index].pvar_flag = 1; + } + set[index].pgain = utils::numeric(FLERR,arg[iarg+3],false,lmp); + if (set[index].pgain <= 0.0) + error->all(FLERR,"Illegal fix deform pressure gain, must be positive"); + iarg += 4; + } } else break; } @@ -313,6 +332,12 @@ irregular(nullptr), set(nullptr) if (set[i].style == NONE) dimflag[i] = 0; else dimflag[i] = 1; + if (set[6].style != NONE) { + dimflag[0] = 1; + dimflag[1] = 1; + dimflag[2] = 1; + } + if (dimflag[0]) box_change |= BOX_CHANGE_X; if (dimflag[1]) box_change |= BOX_CHANGE_Y; if (dimflag[2]) box_change |= BOX_CHANGE_Z; @@ -324,7 +349,7 @@ irregular(nullptr), set(nullptr) // b/c shrink wrap will change box-length for (int i = 0; i < 3; i++) - if (set[i].style && (domain->boundary[i][0] >= 2 || domain->boundary[i][1] >= 2)) + if ((set[i].style || set[6].style) && (domain->boundary[i][0] >= 2 || domain->boundary[i][1] >= 2)) error->all(FLERR,"Cannot use fix deform on a shrink-wrapped boundary"); // no tilt deformation on shrink-wrapped 2nd dim @@ -431,7 +456,7 @@ irregular(nullptr), set(nullptr) // set varflag varflag = 0; - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { if (set[i].style == VARIABLE) varflag = 1; if (set[i].pvar_flag) varflag = 1; } @@ -439,7 +464,7 @@ irregular(nullptr), set(nullptr) // set pressure_flag pressure_flag = 0; - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { if (set[i].style == PRESSURE || set[i].style == PMEAN) pressure_flag = 1; if (set[i].coupled_flag) pressure_flag = 1; } @@ -452,6 +477,14 @@ irregular(nullptr), set(nullptr) if (set[i].style == PMEAN) error->all(FLERR, "Cannot use fix deform to assign constant volume and pressure"); + // check conflicts between x,y,z styles and iso + + if (set[6].style) + for (int i = 0; i < 3; i++) { + if (set[i].style == FINAL || set[i].style == DELTA || set[i].style == SCALE || set[i].style == PMEAN || set[i].style == VARIABLE) + error->all(FLERR, "Cannot use fix deform iso parameter with x, y, or z styles other than vel, erate, trate, pressure, and wiggle"); + } + // check pressure used for max rate and normalize error flag if (!pressure_flag && max_h_rate != 0) @@ -470,6 +503,7 @@ irregular(nullptr), set(nullptr) set[3].tilt_initial = domain->yz; set[4].tilt_initial = domain->xz; set[5].tilt_initial = domain->xy; + set[6].vol_initial = domain->xprd * domain->yprd * domain->zprd; // reneighboring only forced if flips can occur due to shape changes @@ -511,7 +545,7 @@ irregular(nullptr), set(nullptr) FixDeform::~FixDeform() { if (set) { - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { delete[] set[i].hstr; delete[] set[i].hratestr; delete[] set[i].pstr; @@ -569,7 +603,7 @@ void FixDeform::init() // check variables for VARIABLE style - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { if (set[i].style != VARIABLE) continue; set[i].hvar = input->variable->find(set[i].hstr); if (set[i].hvar < 0) @@ -585,7 +619,7 @@ void FixDeform::init() // check optional variables for PRESSURE or PMEAN style - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { if (!set[i].pvar_flag) continue; set[i].pvar = input->variable->find(set[i].pstr); if (set[i].pvar < 0) @@ -701,6 +735,8 @@ void FixDeform::init() } } + set[6].vol_start = domain->xprd * domain->yprd * domain->zprd; + // if using tilt TRATE, then initial tilt must be non-zero for (int i = 3; i < 6; i++) @@ -740,6 +776,7 @@ void FixDeform::init() // set domain->h_rate values for use by domain and other fixes/computes // initialize all rates to 0.0 // cannot set here for TRATE,VOLUME,WIGGLE,VARIABLE,PRESSURE since not constant + // if iso style is used, these will also not be constant h_rate = domain->h_rate; h_ratelo = domain->h_ratelo; @@ -867,6 +904,10 @@ void FixDeform::end_of_step() if (volume_flag) set_volume(); + // apply any final isotropic scalings + + if (set[6].style) set_iso(); + // Save pressure/strain rate if required if (pressure_flag) { @@ -983,15 +1024,15 @@ void FixDeform::end_of_step() // reset global and local box to new size/shape // only if deform fix is controlling the dimension - if (set[0].style) { + if (set[0].style || set[6].style) { domain->boxlo[0] = set[0].lo_target; domain->boxhi[0] = set[0].hi_target; } - if (set[1].style) { + if (set[1].style || set[6].style) { domain->boxlo[1] = set[1].lo_target; domain->boxhi[1] = set[1].hi_target; } - if (set[2].style) { + if (set[2].style || set[6].style) { domain->boxlo[2] = set[2].lo_target; domain->boxhi[2] = set[2].hi_target; } @@ -1285,6 +1326,67 @@ void FixDeform::set_volume() } } +/* ---------------------------------------------------------------------- + apply isotropic controls +------------------------------------------------------------------------- */ + +void FixDeform::set_iso() +{ + int i; + double scale, shift; + double v_rate; + + if (set[6].style == VOLUME) { + double v0 = set[6].vol_start; + double v = 1.0; + for (i = 0; i < 3; i++) + v *= (set[i].hi_target - set[i].lo_target); + + scale = std::pow(v0 / v, THIRD); + for (i = 0; i < 3; i++) { + shift = 0.5 * (set[i].hi_target - set[i].lo_target) * scale; + set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift; + set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift; + + // Recalculate h_rate + h_rate[i] = (set[i].hi_target - set[i].lo_target) / (domain->boxhi[i] - domain->boxlo[i]) - 1.0; + h_rate[i] /= update->dt; + } + + } else if (set[6].style == PRESSURE) { + + // If variable pressure, calculate current target + if (set[6].pvar_flag) + set[6].ptarget = input->variable->compute_equal(set[6].pvar); + + v_rate = set[6].pgain * (pressure->scalar- set[6].ptarget); + + if (normalize_pressure_flag) { + if (set[6].ptarget == 0) { + if (max_h_rate == 0) { + error->all(FLERR, "Cannot normalize error for zero pressure without defining a max rate"); + } else v_rate = max_h_rate * v_rate / fabs(v_rate); + } else v_rate /= fabs(set[6].ptarget); + } + + if (max_h_rate != 0) + if (fabs(set[6].ptarget) > max_h_rate) + v_rate = max_h_rate * v_rate / fabs(v_rate); + + set[6].cumulative_strain += update->dt * v_rate; + scale = (1.0 + set[6].cumulative_strain); + for (i = 0; i < 3; i++) { + shift = 0.5 * (set[i].hi_target - set[i].lo_target) * scale; + set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift; + set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift; + + // Recalculate h_rate + h_rate[i] = (set[i].hi_target - set[i].lo_target) / (domain->boxhi[i] - domain->boxlo[i]) - 1.0; + h_rate[i] /= update->dt; + } + } +} + /* ---------------------------------------------------------------------- write Set data to restart file ------------------------------------------------------------------------- */ @@ -1294,7 +1396,7 @@ void FixDeform::write_restart(FILE *fp) if (comm->me == 0) { int size = 6 * sizeof(Set); fwrite(&size,sizeof(int),1,fp); - fwrite(set,sizeof(Set),6,fp); + fwrite(set,sizeof(Set),7,fp); } } @@ -1306,7 +1408,7 @@ void FixDeform::restart(char *buf) { int samestyle = 1; Set *set_restart = (Set *) buf; - for (int i=0; i<6; ++i) { + for (int i=0; i<7; ++i) { // restore data from initial state set[i].lo_initial = set_restart[i].lo_initial; set[i].hi_initial = set_restart[i].hi_initial; @@ -1315,6 +1417,7 @@ void FixDeform::restart(char *buf) set[i].saved = set_restart[i].saved; set[i].prior_rate = set_restart[i].prior_rate; set[i].prior_pressure = set_restart[i].prior_pressure; + set[i].cumulative_strain = set_restart[i].cumulative_strain; // check if style settings are consistent (should do the whole set?) if (set[i].style != set_restart[i].style) samestyle = 0; diff --git a/src/fix_deform.h b/src/fix_deform.h index eda97f7c90..e8a4766b12 100644 --- a/src/fix_deform.h +++ b/src/fix_deform.h @@ -72,6 +72,7 @@ class FixDeform : public Fix { double vol_initial, vol_start; double ptarget, pgain; double prior_pressure, prior_rate; + double cumulative_strain; int saved; int fixed, dynamic1, dynamic2; char *hstr, *hratestr, *pstr; @@ -85,6 +86,7 @@ class FixDeform : public Fix { void set_strain(); void set_pressure(); void set_volume(); + void set_iso(); void couple(); }; From f75eda4bf4f5d223eaf469a5f42d7b8377c0547b Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 14 Jun 2023 14:16:46 -0600 Subject: [PATCH 009/448] Updating size of set in restart --- src/fix_deform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index e76b34f302..34e73b4e10 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -1394,7 +1394,7 @@ void FixDeform::set_iso() void FixDeform::write_restart(FILE *fp) { if (comm->me == 0) { - int size = 6 * sizeof(Set); + int size = 7 * sizeof(Set); fwrite(&size,sizeof(int),1,fp); fwrite(set,sizeof(Set),7,fp); } From 60173c477d59c770e1bbac40e68111a01f28fd67 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 14 Jun 2023 15:14:19 -0600 Subject: [PATCH 010/448] Creating persistent h_rate variable --- src/fix_deform.cpp | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 34e73b4e10..b595901a05 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -47,7 +47,7 @@ enum{NOCOUPLE=0,XYZ,XY,YZ,XZ}; /* ---------------------------------------------------------------------- */ FixDeform::FixDeform(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), -irregular(nullptr), set(nullptr) +irregular(nullptr), set(nullptr), id_temp(nullptr), id_press(nullptr) { if (narg < 4) error->all(FLERR,"Illegal fix deform command"); @@ -538,6 +538,16 @@ irregular(nullptr), set(nullptr) modify->add_compute(fmt::format("{} all pressure {}",id_press, id_temp)); pflag = 1; } + + // initialize all rates to 0.0 in constructor instead of init so values persist + // across run statements and ghosts have correct velocities until the destructor + h_rate = domain->h_rate; + h_ratelo = domain->h_ratelo; + + for (int i = 0; i < 3; i++) + h_rate[i] = h_ratelo[i] = 0.0; + for (int i = 3; i < 6; i++) + h_rate[i] = 0.0; } /* ---------------------------------------------------------------------- */ @@ -774,15 +784,10 @@ void FixDeform::init() } // set domain->h_rate values for use by domain and other fixes/computes - // initialize all rates to 0.0 // cannot set here for TRATE,VOLUME,WIGGLE,VARIABLE,PRESSURE since not constant // if iso style is used, these will also not be constant - h_rate = domain->h_rate; - h_ratelo = domain->h_ratelo; - for (int i = 0; i < 3; i++) { - h_rate[i] = h_ratelo[i] = 0.0; if (set[i].style == FINAL || set[i].style == DELTA || set[i].style == SCALE || set[i].style == VEL || set[i].style == ERATE) { @@ -797,7 +802,6 @@ void FixDeform::init() } for (int i = 3; i < 6; i++) { - h_rate[i] = 0.0; if (set[i].style == FINAL || set[i].style == DELTA || set[i].style == VEL || set[i].style == ERATE) { if (delt != 0.0) @@ -1394,8 +1398,10 @@ void FixDeform::set_iso() void FixDeform::write_restart(FILE *fp) { if (comm->me == 0) { - int size = 7 * sizeof(Set); + int size = 9 * sizeof(double) + 7 * sizeof(Set); fwrite(&size,sizeof(int),1,fp); + fwrite(h_rate,sizeof(double),6,fp); + fwrite(h_ratelo,sizeof(double),3,fp); fwrite(set,sizeof(Set),7,fp); } } @@ -1406,9 +1412,16 @@ void FixDeform::write_restart(FILE *fp) void FixDeform::restart(char *buf) { + int n = 0; + auto list = (double *) buf; + for (int i = 0; i < 6; i++) + h_rate[i] = list[n++]; + for (int i = 0; i < 3; i++) + h_ratelo[i] = list[n++]; + int samestyle = 1; - Set *set_restart = (Set *) buf; - for (int i=0; i<7; ++i) { + Set *set_restart = (Set *) &buf[n * sizeof(double)]; + for (int i = 0; i < 7; ++i) { // restore data from initial state set[i].lo_initial = set_restart[i].lo_initial; set[i].hi_initial = set_restart[i].hi_initial; From d6532d3550694d3f5ce4a1ae83ff4b57ce1598a6 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 23 Jun 2023 14:13:32 -0600 Subject: [PATCH 011/448] Adding h_ratelo calculation --- src/fix_deform.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index b595901a05..8da904100e 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -1114,7 +1114,8 @@ void FixDeform::set_strain() set[i].hi_target = set[i].hi_start + 0.5 * del; h_rate[i] = input->variable->compute_equal(set[i].hratevar); h_ratelo[i] = -0.5 * h_rate[i]; - } else if (set[i].style != VOLUME) { + } else if (set[i].style == FINAL || set[i].style == DELTA || set[i].style == SCALE || + set[i].style == VEL || set[i].style == ERATE) { set[i].lo_target = set[i].lo_start + delta * (set[i].lo_stop - set[i].lo_start); set[i].hi_target = set[i].hi_start + delta * (set[i].hi_stop - set[i].hi_start); } @@ -1215,6 +1216,7 @@ void FixDeform::set_pressure() if (max_h_rate != 0) if (fabs(set[i].ptarget) > max_h_rate) h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]); + h_ratelo[i] = -0.5 * h_rate[i]; double offset = 0.5 * (domain->boxhi[i] - domain->boxlo[i]) * (1.0 + update->dt * h_rate[i]); set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - offset; @@ -1324,6 +1326,7 @@ void FixDeform::set_volume() } h_rate[i] = (2.0 * shift / (domain->boxhi[i] - domain->boxlo[i]) - 1.0) / update->dt; + h_ratelo[i] = -0.5 * h_rate[i]; set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift; set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift; @@ -1355,6 +1358,7 @@ void FixDeform::set_iso() // Recalculate h_rate h_rate[i] = (set[i].hi_target - set[i].lo_target) / (domain->boxhi[i] - domain->boxlo[i]) - 1.0; h_rate[i] /= update->dt; + h_ratelo[i] = -0.5 * h_rate[i]; } } else if (set[6].style == PRESSURE) { @@ -1387,6 +1391,7 @@ void FixDeform::set_iso() // Recalculate h_rate h_rate[i] = (set[i].hi_target - set[i].lo_target) / (domain->boxhi[i] - domain->boxlo[i]) - 1.0; h_rate[i] /= update->dt; + h_ratelo[i] = -0.5 * h_rate[i]; } } } From 2ef326273d703eeee71cbb894a451810ea3055db Mon Sep 17 00:00:00 2001 From: jtclemm Date: Sat, 15 Jul 2023 16:09:53 -0600 Subject: [PATCH 012/448] Fixing bug in max rate threshold --- src/fix_deform.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 8da904100e..ff6caf5a1d 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -1214,8 +1214,9 @@ void FixDeform::set_pressure() } if (max_h_rate != 0) - if (fabs(set[i].ptarget) > max_h_rate) + if (fabs(h_rate[i]) > max_h_rate) h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]); + h_ratelo[i] = -0.5 * h_rate[i]; double offset = 0.5 * (domain->boxhi[i] - domain->boxlo[i]) * (1.0 + update->dt * h_rate[i]); @@ -1378,7 +1379,7 @@ void FixDeform::set_iso() } if (max_h_rate != 0) - if (fabs(set[6].ptarget) > max_h_rate) + if (fabs(v_rate) > max_h_rate) v_rate = max_h_rate * v_rate / fabs(v_rate); set[6].cumulative_strain += update->dt * v_rate; From ec65fc48adbe1d2edb40db35e1ae6f85f7b5f7fb Mon Sep 17 00:00:00 2001 From: jtclemm Date: Tue, 18 Jul 2023 09:25:22 -0600 Subject: [PATCH 013/448] Cleaning up files, fixing bug in berendsen --- doc/src/fix_deform.rst | 10 ++++++---- src/fix_deform.cpp | 35 +++++++++++++++++------------------ src/fix_press_berendsen.cpp | 2 +- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/doc/src/fix_deform.rst b/doc/src/fix_deform.rst index 7a17f14fed..a4d449850e 100644 --- a/doc/src/fix_deform.rst +++ b/doc/src/fix_deform.rst @@ -98,6 +98,8 @@ Syntax Modifies the behavior of the *volume* option to try and balance pressures *max/rate* value = *rate* rate = maximum strain rate for pressure control + *normalize/pressure* value = *yes* or *no* + Modifies pressure controls such that the deviation in pressure is normalized by the target pressure Examples """""""" @@ -111,7 +113,6 @@ Examples fix 1 all deform 1 x pressure 2.0 0.1 normalize/pressure yes max/rate 0.001 fix 1 all deform 1 x trate 0.1 y volume z volume vol/balance/p yes fix 1 all deform 1 x trate 0.1 y pressure/mean 0.0 1.0 z pressure/mean 0.0 1.0 - fix 1 all deform 1 x trate 0.1 y trate -0.1 overlay/pressure/mean 1.0 0.1 Description """"""""""" @@ -346,12 +347,13 @@ applied strain using the :ref:`max/rate ` option and couple pressures in different dimensions using the :ref:`couple ` option. -The *pressure/mean* style is changes a dimension in order to maintain +The *pressure/mean* style changes a dimension's box length to maintain a constant mean pressure defined as the trace of the pressure tensor. This option is therefore very similar to the *pressure* style with identical arguments except the current and target pressures refer to the -mean trace of the pressure tensor. The same options also apply except -for the :ref:`couple ` option. +mean trace of the pressure tensor. All options for the *pressure* style +also apply to the *pressure/mean* style except for the +:ref:`couple ` option. ---------- diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index ff6caf5a1d..79f9b3a5ba 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -24,6 +24,7 @@ #include "domain.h" #include "error.h" #include "force.h" +#include "group.h" #include "input.h" #include "irregular.h" #include "kspace.h" @@ -1522,31 +1523,31 @@ double FixDeform::memory_usage() int FixDeform::modify_param(int narg, char **arg) { - if (!pressure_flag) error->all(FLERR,"Cannot modify fix deform without a pressure control"); if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) utils::missing_cmd_args(FLERR, "fix_modify deform", error); + if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); if (tflag) { modify->delete_compute(id_temp); tflag = 0; } - delete [] id_temp; + delete[] id_temp; id_temp = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; + temperature = modify->get_compute_by_id(arg[1]); + if (!temperature) + error->all(FLERR,"Could not find fix_modify temperature compute ID: ", arg[1]); if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); + error->all(FLERR,"Fix_modify temperature compute {} does not compute temperature", arg[1]); if (temperature->igroup != 0 && comm->me == 0) - error->warning(FLERR,"Temperature for deform is not for group all"); + error->warning(FLERR,"Temperature compute {} for fix {} is not for group all: {}", + arg[1], style, group->names[temperature->igroup]); // reset id_temp of pressure to new temperature ID - icompute = modify->find_compute(id_press); - if (icompute < 0) - error->all(FLERR,"Pressure ID for fix deform does not exist"); - modify->compute[icompute]->reset_extra_compute_fix(id_temp); + auto icompute = modify->get_compute_by_id(id_press); + if (!icompute) + error->all(FLERR,"Pressure compute ID {} for fix {} does not exist", id_press, style); + icompute->reset_extra_compute_fix(id_temp); return 2; @@ -1556,15 +1557,13 @@ int FixDeform::modify_param(int narg, char **arg) modify->delete_compute(id_press); pflag = 0; } - delete [] id_press; + delete[] id_press; id_press = utils::strdup(arg[1]); - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); - pressure = modify->compute[icompute]; - + pressure = modify->get_compute_by_id(arg[1]); + if (!pressure) error->all(FLERR,"Could not find fix_modify pressure compute ID: {}", arg[1]); if (pressure->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); + error->all(FLERR,"Fix_modify pressure compute {} does not compute pressure", arg[1]); return 2; } diff --git a/src/fix_press_berendsen.cpp b/src/fix_press_berendsen.cpp index 67802654ba..ee9a879616 100644 --- a/src/fix_press_berendsen.cpp +++ b/src/fix_press_berendsen.cpp @@ -491,7 +491,7 @@ int FixPressBerendsen::modify_param(int narg, char **arg) id_press = utils::strdup(arg[1]); pressure = modify->get_compute_by_id(arg[1]); - if (pressure) error->all(FLERR,"Could not find fix_modify pressure compute ID: {}", arg[1]); + if (!pressure) error->all(FLERR,"Could not find fix_modify pressure compute ID: {}", arg[1]); if (pressure->pressflag == 0) error->all(FLERR,"Fix_modify pressure compute {} does not compute pressure", arg[1]); return 2; From e5bcbd4213ceaad5297cea609a70b26c7c31ee90 Mon Sep 17 00:00:00 2001 From: Ludwig Ahrens Date: Wed, 2 Aug 2023 13:03:45 +0200 Subject: [PATCH 014/448] Bugfix electrode/conq after changes for qtotal --- src/ELECTRODE/fix_electrode_conq.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ELECTRODE/fix_electrode_conq.cpp b/src/ELECTRODE/fix_electrode_conq.cpp index 0d3d1d2aaf..a6baa1e122 100644 --- a/src/ELECTRODE/fix_electrode_conq.cpp +++ b/src/ELECTRODE/fix_electrode_conq.cpp @@ -30,7 +30,7 @@ FixElectrodeConq::FixElectrodeConq(LAMMPS *lmp, int narg, char **arg) : FixElectrodeConp(lmp, narg, arg) { // copy const-style values across because update_psi will change group_psi - group_q = group_psi; + group_q = group_psi_const; if (symm) { if (num_of_groups == 1) From cd5ebb86c870ebdceb842130aeea6b9bdf067a30 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Fri, 25 Aug 2023 21:37:57 -0400 Subject: [PATCH 015/448] inserting atoms: correct logic for per-atom mass --- src/REACTION/fix_bond_react.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index d124b06dc2..9254c3f5d1 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -3895,7 +3895,8 @@ int FixBondReact::insert_atoms(tagint **my_update_mega_glove, int iupdate) // guess a somewhat reasonable initial velocity based on reaction site // further control is possible using bond_react_MASTER_group // compute |velocity| corresponding to a given temperature t, using specific atom's mass - double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / atom->mass[twomol->type[m]]); + double mymass = atom->rmass ? atom->rmass[n] : atom->mass[twomol->type[m]]; + double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / mymass); v[n][0] = random[rxnID]->uniform(); v[n][1] = random[rxnID]->uniform(); v[n][2] = random[rxnID]->uniform(); From 999c364b83d69195dfbfeaeb47397feb2d6d7dc7 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Fri, 25 Aug 2023 21:44:42 -0400 Subject: [PATCH 016/448] better way to list rxn counts --- .../reaction/create_atoms_polystyrene/in.grow_styrene | 2 +- examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt | 4 ++-- .../PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized | 2 +- .../PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized | 2 +- .../tiny_nylon/in.tiny_nylon.stabilized_variable_probability | 2 +- .../PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized | 2 +- .../reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene index 7860db4e55..dcca29c026 100644 --- a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene +++ b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene @@ -40,7 +40,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1 -thermo_style custom step temp press density f_myrxns[1] +thermo_style custom step temp press density f_myrxns[*] thermo 100 diff --git a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt index 9678a714d6..635b2c9750 100644 --- a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt +++ b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt @@ -26,7 +26,7 @@ read_data large_nylon_melt.data.gz & extra/angle/per/atom 15 & extra/dihedral/per/atom 15 & extra/improper/per/atom 25 & - extra/special/per/atom 25 + extra/special/per/atom 25 velocity all create 800.0 4928459 dist gaussian @@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 800 800 100 # you can use the internally created 'bond_react_MASTER_group', like so: # fix 2 bond_react_MASTER_group temp/rescale 1 800 800 10 1 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] # cumulative reaction counts +thermo_style custom step temp press density f_myrxns[*] # cumulative reaction counts # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized index 57b03b630f..ea09d06893 100644 --- a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized +++ b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized @@ -44,7 +44,7 @@ fix rxns all bond/react stabilization yes statted_grp .03 & fix 1 statted_grp_REACT nvt temp 300 300 100 -thermo_style custom step temp f_rxns[1] f_rxns[2] f_rxns[3] f_rxns[4] +thermo_style custom step temp f_rxns[*] run 2000 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized index 95b39033db..853bc45f1e 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized @@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100 # by using the internally-created 'bond_react_MASTER_group', like so: fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability index 88b5a95a41..f3c32f3cbd 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability @@ -54,7 +54,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100 # by using the internally-created 'bond_react_MASTER_group', like so: fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1 -thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized index a569e28d43..e5cbaaaf86 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized @@ -47,7 +47,7 @@ fix myrxns all bond/react stabilization no & fix 1 all nve/limit .03 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized index 4ecc481719..230998fcd3 100644 --- a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized +++ b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized @@ -51,7 +51,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1 -thermo_style custom step temp press density f_rxn1[1] f_rxn1[2] f_rxn1[3] +thermo_style custom step temp press density f_rxn1[*] run 10000 From 1039f86037f2870062a0a79108a933ab96063494 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Sat, 26 Aug 2023 14:29:07 -0400 Subject: [PATCH 017/448] remove unnecessary restriction do not check for comm cutoff when initiator atoms are directly bonded --- src/REACTION/fix_bond_react.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 9254c3f5d1..1da26e32a1 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -827,11 +827,10 @@ void FixBondReact::init() nlevels_respa = (dynamic_cast(update->integrate))->nlevels; // check cutoff for iatomtype,jatomtype - for (int i = 0; i < nreacts; i++) { - if (!utils::strmatch(force->pair_style,"^hybrid")) - if (force->pair == nullptr || cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]]) + if (!utils::strmatch(force->pair_style,"^hybrid")) + for (int i = 0; i < nreacts; i++) + if (force->pair == nullptr || (closeneigh[i] < 0 && cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]])) error->all(FLERR,"Fix bond/react: Fix bond/react cutoff is longer than pairwise cutoff"); - } // need a half neighbor list, built every Nevery steps neighbor->add_request(this, NeighConst::REQ_OCCASIONAL); From 77a5fd16dd5309d1955aae573ed41d66bc0caede Mon Sep 17 00:00:00 2001 From: jtclemm Date: Tue, 19 Sep 2023 13:37:47 +0200 Subject: [PATCH 018/448] Fixing bug in link pressure --- src/fix_deform.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 79f9b3a5ba..139f1e0835 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -1302,6 +1302,7 @@ void FixDeform::set_volume() double p2 = pressure->vector[fixed]; double p1i = set[i].prior_pressure; double p2i = set[fixed].prior_pressure; + double denominator; if (e3 == 0) { e1 = 0.0; @@ -1314,7 +1315,12 @@ void FixDeform::set_volume() if (!linked_pressure) { // Calculate first strain rate by expanding stress to linear order, p1(t+dt) = p2(t+dt) // Calculate second strain rate to preserve volume - e1 = -e3 / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2) / (p2 - p2i + (p1 - p1i) / e1i * e2i); + denominator = (p2 - p2i + (p1 - p1i) / e1i * e2i); + if (denominator != 0.0 && e1i != 0.0) { + e1 = (-e3 / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2)) / denominator; + } else { + e1 = e2i; + } e2 = (1.0 - (1 + e3 * dt) * (1 + e1 * dt)) / ((1 + e3 * dt) * (1 + e1 * dt) * dt); shift = 0.5 * L1i * (1.0 + e1 * dt); From 5ff16da2725e641c3a4eae7db763dd8f0d2cf8fa Mon Sep 17 00:00:00 2001 From: jtclemm Date: Thu, 21 Sep 2023 12:17:47 +0200 Subject: [PATCH 019/448] Adding rate cap on vol link pressure --- src/fix_deform.cpp | 48 +++++++++++++++++++++++++++++++++++++++++++--- src/fix_deform.h | 1 + 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 139f1e0835..3430c26061 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -1297,6 +1297,8 @@ void FixDeform::set_volume() double L2i = domain->boxhi[fixed] - domain->boxlo[fixed]; double L3i = domain->boxhi[dynamic1] - domain->boxlo[dynamic1]; double L3 = (set[dynamic1].hi_target - set[dynamic1].lo_target); + double Vi = L1i * L2i * L3i; + double V = L3 * L1i * L2i; double e3 = (L3 / L3i - 1.0) / dt; double p1 = pressure->vector[i]; double p2 = pressure->vector[fixed]; @@ -1315,13 +1317,22 @@ void FixDeform::set_volume() if (!linked_pressure) { // Calculate first strain rate by expanding stress to linear order, p1(t+dt) = p2(t+dt) // Calculate second strain rate to preserve volume - denominator = (p2 - p2i + (p1 - p1i) / e1i * e2i); + denominator = p2 - p2i + e2i * ((p1 - p1i) / e1i); if (denominator != 0.0 && e1i != 0.0) { - e1 = (-e3 / (1 + e3 * dt) * (p2 - p2i) - e2i * (p1 - p2)) / denominator; + e1 = (((p2 - p2i) * (Vi - V) / (V * dt)) - e2i * (p1 - p2)) / denominator; } else { e1 = e2i; } - e2 = (1.0 - (1 + e3 * dt) * (1 + e1 * dt)) / ((1 + e3 * dt) * (1 + e1 * dt) * dt); + e2 = (Vi - V * (1 + e1 * dt)) / (V * (1 + e1 * dt) * dt); + + // If strain rate exceeds limit in either dimension, cap it at the maximum compatible rate + if (max_h_rate != 0) + if (fabs(e1) > max_h_rate || fabs(e2) > max_h_rate) + if (fabs(e1) > fabs(e2)) + adjust_linked_rates(e1, e2, e3, Vi, V); + else + adjust_linked_rates(e2, e1, e3, Vi, V); + shift = 0.5 * L1i * (1.0 + e1 * dt); linked_pressure = 1; @@ -1341,6 +1352,37 @@ void FixDeform::set_volume() } } + +/* ---------------------------------------------------------------------- + Rescale volume preserving strain rates to enforce max rate +------------------------------------------------------------------------- */ + +void FixDeform::adjust_linked_rates(double &e_larger, double &e_smaller, double e3, double Vi, double V) +{ + double dt = update->dt; + double e_lim_positive = (Vi - V * (1 + max_h_rate * dt)) / (V * (1 + max_h_rate * dt) * dt); + double e_lim_negative = (Vi - V * (1 - max_h_rate * dt)) / (V * (1 - max_h_rate * dt) * dt); + if ((e_larger * e3) >= 0) { + if (e_larger > 0.0) { + // Same sign as primary strain rate, cap third dimension + e_smaller = -max_h_rate; + e_larger = e_lim_negative; + } else { + e_smaller = max_h_rate; + e_larger = e_lim_positive; + } + } else { + // Opposite sign, set to maxrate. + if (e_larger > 0.0) { + e_larger = max_h_rate; + e_smaller = e_lim_positive; + } else { + e_larger = -max_h_rate; + e_smaller = e_lim_negative; + } + } +} + /* ---------------------------------------------------------------------- apply isotropic controls ------------------------------------------------------------------------- */ diff --git a/src/fix_deform.h b/src/fix_deform.h index e8a4766b12..2f0b66aa71 100644 --- a/src/fix_deform.h +++ b/src/fix_deform.h @@ -88,6 +88,7 @@ class FixDeform : public Fix { void set_volume(); void set_iso(); void couple(); + void adjust_linked_rates(double&, double&, double, double, double); }; } // namespace LAMMPS_NS From 857cc5392385f1799b222af2996c6b684ee613da Mon Sep 17 00:00:00 2001 From: Ludwig Ahrens-Iwers Date: Wed, 4 Oct 2023 10:32:50 +0200 Subject: [PATCH 020/448] README file for the ELECTRODE package --- src/ELECTRODE/README | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/ELECTRODE/README diff --git a/src/ELECTRODE/README b/src/ELECTRODE/README new file mode 100644 index 0000000000..56894943a0 --- /dev/null +++ b/src/ELECTRODE/README @@ -0,0 +1,15 @@ +This package provides the "fix electrode/*" commands which can be used in a +LAMMPS input script. These fixes implement the constant potential method, which +minimizes the energy of electrodes as a function of atom charges at given +electric potentials or electrode charges. + +See the doc page for the fix electrode/conp command to get started. There are +example scripts for using this package in examples/PACKAGES/electrode. + +This package uses an external library in lib/electrode which must be compiled +before making LAMMPS. See the doc page on "Packages with extra build options" +for more information. + +The primary people who created this package are Ludwig Ahrens-Iwers, Shern Tee +(s.tee@griffith.edu.au) and Robert Meißner (robert.meissner@tuhh.de). Contact +them directly if you have questions. From a651697d2e8477927c04eaf1db5ba0a5ef4b6f5c Mon Sep 17 00:00:00 2001 From: Ludwig Ahrens-Iwers Date: Mon, 13 Nov 2023 14:41:13 +0100 Subject: [PATCH 021/448] Mix eta in Electrode package --- doc/src/fix_electrode.rst | 7 ++ examples/PACKAGES/electrode/madelung/data.eta | 34 +++++ .../PACKAGES/electrode/madelung/data.eta_mix | 34 +++++ examples/PACKAGES/electrode/madelung/eval.py | 4 +- examples/PACKAGES/electrode/madelung/in.eta | 17 +++ .../PACKAGES/electrode/madelung/in.eta_cg | 17 +++ .../PACKAGES/electrode/madelung/in.eta_mix | 17 +++ .../PACKAGES/electrode/madelung/plate_cap.py | 117 +++++++++--------- .../electrode/madelung/settings_eta.mod | 19 +++ examples/PACKAGES/electrode/madelung/test.sh | 16 ++- src/ELECTRODE/electrode_matrix.cpp | 20 ++- src/ELECTRODE/electrode_matrix.h | 3 + src/ELECTRODE/electrode_vector.cpp | 35 ++++-- src/ELECTRODE/electrode_vector.h | 3 + src/ELECTRODE/fix_electrode_conp.cpp | 36 +++++- src/ELECTRODE/fix_electrode_conp.h | 2 + 16 files changed, 303 insertions(+), 78 deletions(-) create mode 100644 examples/PACKAGES/electrode/madelung/data.eta create mode 100644 examples/PACKAGES/electrode/madelung/data.eta_mix create mode 100644 examples/PACKAGES/electrode/madelung/in.eta create mode 100644 examples/PACKAGES/electrode/madelung/in.eta_cg create mode 100644 examples/PACKAGES/electrode/madelung/in.eta_mix create mode 100644 examples/PACKAGES/electrode/madelung/settings_eta.mod diff --git a/doc/src/fix_electrode.rst b/doc/src/fix_electrode.rst index 5685482a71..d807da7fd2 100644 --- a/doc/src/fix_electrode.rst +++ b/doc/src/fix_electrode.rst @@ -70,6 +70,8 @@ Syntax filename = file from which to read inverted matrix *qtotal* value = number or *v_* equal-style variable add overall potential so that all electrode charges add up to *qtotal* + *eta* value = d_propname + d_propname = a custom double vector defined via fix property/atom Examples """""""" @@ -264,6 +266,11 @@ individual electrodes, and since *symm on* constrains the total charge of all electrodes to be zero, either option is incompatible with the *qtotal* keyword (even if *qtotal* is set to zero). +The keyword *eta* takes the name of a custom double vector defined via fix +property/atom. The values will be used instead of the standard eta value. The +property/atom fix must be for vector of double values and use the *ghost on* +option. + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" diff --git a/examples/PACKAGES/electrode/madelung/data.eta b/examples/PACKAGES/electrode/madelung/data.eta new file mode 100644 index 0000000000..b05700a4ab --- /dev/null +++ b/examples/PACKAGES/electrode/madelung/data.eta @@ -0,0 +1,34 @@ +LAMMPS data file via write_data, version 24 Dec 2020, timestep = 0 + +4 atoms +3 atom types + +0 1 xlo xhi +0 1 ylo yhi +-10 10 zlo zhi + +Masses + +1 196.966553 +2 196.966553 +3 1.0 + +Pair Coeffs # lj/cut/coul/long + +1 0 0 +2 0 0 +3 0 0 + +Atoms # full + +1 1 1 0.00 0.00 0.00 -2.00 # bottom electrode +2 2 2 0.00 0.00 0.00 2.00 # top electrode +3 3 3 0.50 0.00 0.00 -1.00 # bottom electrolyte +4 3 3 -0.50 0.00 0.00 1.00 # top electrolyte + +ETA + +1 2.0 +2 2.0 +3 0 +4 0 diff --git a/examples/PACKAGES/electrode/madelung/data.eta_mix b/examples/PACKAGES/electrode/madelung/data.eta_mix new file mode 100644 index 0000000000..9322ebd662 --- /dev/null +++ b/examples/PACKAGES/electrode/madelung/data.eta_mix @@ -0,0 +1,34 @@ +LAMMPS data file via write_data, version 24 Dec 2020, timestep = 0 + +4 atoms +3 atom types + +0 1 xlo xhi +0 1 ylo yhi +-10 10 zlo zhi + +Masses + +1 196.966553 +2 196.966553 +3 1.0 + +Pair Coeffs # lj/cut/coul/long + +1 0 0 +2 0 0 +3 0 0 + +Atoms # full + +1 1 1 0.00 0.00 0.00 -2.00 # bottom electrode +2 2 2 0.00 0.00 0.00 2.00 # top electrode +3 3 3 0.50 0.00 0.00 -1.00 # bottom electrolyte +4 3 3 -0.50 0.00 0.00 1.00 # top electrolyte + +ETA + +1 0.5 +2 3.0 +3 0 +4 0 diff --git a/examples/PACKAGES/electrode/madelung/eval.py b/examples/PACKAGES/electrode/madelung/eval.py index 2f5a355d9b..feda0e384e 100644 --- a/examples/PACKAGES/electrode/madelung/eval.py +++ b/examples/PACKAGES/electrode/madelung/eval.py @@ -1,7 +1,7 @@ #!/usr/env/python3 -import sys import os.path as op +import sys def rel_error(out, ref): @@ -49,5 +49,5 @@ for label, ref, out in out_lines: error = rel_error(out, ref) lines.append(f"{label}: {out:.5f}, {error:.5f}\n") -with open("madelung.txt", 'a') as f: +with open("madelung.txt", "a") as f: f.writelines(lines) diff --git a/examples/PACKAGES/electrode/madelung/in.eta b/examples/PACKAGES/electrode/madelung/in.eta new file mode 100644 index 0000000000..d928f8fed0 --- /dev/null +++ b/examples/PACKAGES/electrode/madelung/in.eta @@ -0,0 +1,17 @@ +atom_style full +units real +boundary p p f + +kspace_style ewald/electrode 1.0e-8 +kspace_modify slab 8.0 # ew3dc +pair_style lj/cut/coul/long 12 + +fix feta all property/atom d_eta ghost yes +read_data data.eta fix feta NULL ETA + +include "settings_eta.mod" + +fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv + +run 0 + diff --git a/examples/PACKAGES/electrode/madelung/in.eta_cg b/examples/PACKAGES/electrode/madelung/in.eta_cg new file mode 100644 index 0000000000..e04db318d5 --- /dev/null +++ b/examples/PACKAGES/electrode/madelung/in.eta_cg @@ -0,0 +1,17 @@ +atom_style full +units real +boundary p p f + +kspace_style ewald/electrode 1.0e-8 +kspace_modify slab 8.0 # ew3dc +pair_style lj/cut/coul/long 12 + +fix feta all property/atom d_eta ghost yes +read_data data.eta_mix fix feta NULL ETA + +include "settings_eta.mod" + +fix conp bot electrode/conp 0 2 couple top 1 symm on algo cg 1e-6 eta d_eta + +run 0 + diff --git a/examples/PACKAGES/electrode/madelung/in.eta_mix b/examples/PACKAGES/electrode/madelung/in.eta_mix new file mode 100644 index 0000000000..d4bcf71225 --- /dev/null +++ b/examples/PACKAGES/electrode/madelung/in.eta_mix @@ -0,0 +1,17 @@ +atom_style full +units real +boundary p p f + +kspace_style ewald/electrode 1.0e-8 +kspace_modify slab 8.0 # ew3dc +pair_style lj/cut/coul/long 12 + +fix feta all property/atom d_eta ghost on +read_data data.eta_mix fix feta NULL ETA + +include "settings_eta.mod" + +fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv + +run 0 + diff --git a/examples/PACKAGES/electrode/madelung/plate_cap.py b/examples/PACKAGES/electrode/madelung/plate_cap.py index 62d52fe102..fcca166869 100755 --- a/examples/PACKAGES/electrode/madelung/plate_cap.py +++ b/examples/PACKAGES/electrode/madelung/plate_cap.py @@ -3,7 +3,6 @@ import numpy as np from scipy.special import erf -ETA = 2 SQRT2 = np.sqrt(2) COULOMB = 332.06371 # Coulomb constant in Lammps 'real' units QE2F = 23.060549 @@ -17,14 +16,14 @@ def lattice(length): return np.array(np.meshgrid(x, y)).T.reshape(-1, 2) -def a_element(r): +def a_element(r, eta): """Coulomb contribution of two Gaussians""" - return erf(ETA / SQRT2 * r) / r + return erf(eta * r) / r -def b_element(r, q): +def b_element(r, q, eta): """Coulomb contribution of a Gaussian with a point charge""" - return q * erf(ETA * r) / r + return q * erf(eta * r) / r a = 1 # nearest neighbor distance i.e. lattice constant / sqrt(2) @@ -36,59 +35,65 @@ v = np.array([-0.5, 0.5]) * (QE2F / COULOMB) # distances to images within electrode and to opposite electrode distances = a * np.linalg.norm(lattice(LENGTH), axis=1) -opposite_distances = np.sqrt(np.square(distances) + distance_plates ** 2) +opposite_distances = np.sqrt(np.square(distances) + distance_plates**2) -# self interaction and within original box -A_11 = np.sqrt(2 / np.pi) * ETA -A_12 = erf(ETA * distance_plates / SQRT2) / distance_plates +for name, eta_elec in [("", [2.0, 2.0]), ("_eta_mix", [0.5, 3.0])]: + eta_mix = np.prod(eta_elec) / np.sqrt(np.sum(np.square(eta_elec))) + # self interaction and within original box + A_11 = np.sqrt(2 / np.pi) * eta_elec[0] + A_22 = np.sqrt(2 / np.pi) * eta_elec[1] + A_12 = erf(eta_mix * distance_plates) / distance_plates -# interaction with periodic images -A_11 += 4 * np.sum(a_element(distances)) -A_12 += 4 * np.sum(a_element(opposite_distances)) -A = np.array([[A_11, A_12], [A_12, A_11]]) -inv = np.linalg.inv(A) -e = np.array([1, 1]) -inv -= np.matmul(inv, np.matmul(np.outer(e, e), inv)) / np.dot(e, np.dot(inv, e)) + # interaction with periodic images + A_11 += 4 * np.sum(a_element(distances, eta_elec[0] / SQRT2)) + A_22 += 4 * np.sum(a_element(distances, eta_elec[1] / SQRT2)) + A_12 += 4 * np.sum(a_element(opposite_distances, eta_mix)) + A = np.array([[A_11, A_12], [A_12, A_22]]) + inv = np.linalg.inv(A) + e = np.array([1, 1]) + inv -= np.matmul(inv, np.matmul(np.outer(e, e), inv)) / np.dot(e, np.dot(inv, e)) -# electrode-electrolyte interaction -b = [] -for x in x_elec: - bi = 0 - for y, q in zip(x_elyt, q_elyt): - d = abs(y - x) - bi += b_element(d, q) - image_distances = np.sqrt(np.square(distances) + d ** 2) - bi += 4 * np.sum(b_element(image_distances, q)) - b.append(bi) -b = np.array(b) + # electrode-electrolyte interaction + b = [] + for x, eta in zip(x_elec, eta_elec): + bi = 0 + for y, q in zip(x_elyt, q_elyt): + d = abs(y - x) + bi += b_element(d, q, eta) + image_distances = np.sqrt(np.square(distances) + d**2) + bi += 4 * np.sum(b_element(image_distances, q, eta)) + b.append(bi) + b = np.array(b) -# electrolyte-electrolyte energy -elyt_11 = 4 * np.sum(1 / distances) -distance_elyt = x_elyt[1] - x_elyt[0] -elyt_12 = 1 / distance_elyt + 4 * np.sum( - 1 / np.sqrt(np.square(distances) + distance_elyt ** 2) -) -elyt = np.array([[elyt_11, elyt_12], [elyt_12, elyt_11]]) -energy_elyt = 0.5 * np.dot(q_elyt, np.dot(elyt, q_elyt)) - -# electrode charges and energy -q = np.dot(inv, v - b) -energy = COULOMB * (0.5 * np.dot(q, np.dot(A, q)) + np.dot(b, q) + energy_elyt) - -print( - "length, energy / kcal/mol, q1 / e, q2 / e, inv11 / A, inv12 / A, b1 / e/A, b2 / e/A" -) -print( - ", ".join( - [ - str(LENGTH), - f"{energy:.8f}", - f"{q[0]:.10f}", - f"{q[1]:.10f}", - f"{inv[0, 0]:.10f}", - f"{inv[0, 1]:.10f}", - f"{b[0]:.8f}", - f"{b[1]:.8f}", - ] + # electrolyte-electrolyte energy + elyt_11 = 4 * np.sum(1 / distances) + distance_elyt = x_elyt[1] - x_elyt[0] + elyt_12 = 1 / distance_elyt + 4 * np.sum( + 1 / np.sqrt(np.square(distances) + distance_elyt**2) ) -) + elyt = np.array([[elyt_11, elyt_12], [elyt_12, elyt_11]]) + energy_elyt = 0.5 * np.dot(q_elyt, np.dot(elyt, q_elyt)) + + # electrode charges and energy + q = np.dot(inv, v - b) + energy = COULOMB * (0.5 * np.dot(q, np.dot(A, q)) + np.dot(b, q) + energy_elyt) + + with open(f"plate_cap{name}.csv", "w") as f: + f.write( + "length, energy / kcal/mol, q1 / e, q2 / e, inv11 / A, inv12 / A, b1 / e/A, b2 / e/A\n" + ) + f.write( + ", ".join( + [ + str(LENGTH), + f"{energy:.8f}", + f"{q[0]:.10f}", + f"{q[1]:.10f}", + f"{inv[0, 0]:.10f}", + f"{inv[0, 1]:.10f}", + f"{b[0]:.8f}", + f"{b[1]:.8f}", + ] + ) + + "\n" + ) diff --git a/examples/PACKAGES/electrode/madelung/settings_eta.mod b/examples/PACKAGES/electrode/madelung/settings_eta.mod new file mode 100644 index 0000000000..aee63bf2e9 --- /dev/null +++ b/examples/PACKAGES/electrode/madelung/settings_eta.mod @@ -0,0 +1,19 @@ + +# distribute electrode atoms among all processors: +if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2" + +group bot type 1 +group top type 2 + +# get electrode charges +variable q atom q +compute qbot bot reduce sum v_q +compute qtop top reduce sum v_q + +compute compute_pe all pe +variable vpe equal c_compute_pe +variable charge equal c_qtop +fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv" + +thermo_style custom step pe c_qbot c_qtop + diff --git a/examples/PACKAGES/electrode/madelung/test.sh b/examples/PACKAGES/electrode/madelung/test.sh index edac04f5b1..a558ee6711 100644 --- a/examples/PACKAGES/electrode/madelung/test.sh +++ b/examples/PACKAGES/electrode/madelung/test.sh @@ -7,17 +7,27 @@ if [ ! -f $lmpbin ]; then fi ref_out="plate_cap.csv" -if [ ! -f $ref_out ]; then +ref_mix_out="plate_cap_eta_mix.csv" +if [ ! -f $ref_out ] || [ ! -f $ref_mix_out ]; then echo "Generating reference data" - python3 plate_cap.py > $ref_out + python3 plate_cap.py fi echo "Running Lammps inputs" +# w/o eta mixing rm -rf madelung.txt && touch madelung.txt -for file in in.*; do +for file in in.eta in.ewald-ew3dc in.ewald-ew2d in.pppm-ew3dc in.cg; do printf "\n$file\n" >> madelung.txt rm -f out.csv inv.csv vec.csv $lmpbin -i $file &> /dev/null python3 eval.py $ref_out out.csv inv.csv vec.csv done + +# with eta mixing +for file in in.eta_mix in.eta_cg; do + printf "\n$file\n" >> madelung.txt + rm -f out.csv inv.csv vec.csv + $lmpbin -i $file &> /dev/null + python3 eval.py $ref_mix_out out.csv inv.csv vec.csv +done cat madelung.txt diff --git a/src/ELECTRODE/electrode_matrix.cpp b/src/ELECTRODE/electrode_matrix.cpp index 7be9119c62..d917fb1f97 100644 --- a/src/ELECTRODE/electrode_matrix.cpp +++ b/src/ELECTRODE/electrode_matrix.cpp @@ -43,6 +43,7 @@ ElectrodeMatrix::ElectrodeMatrix(LAMMPS *lmp, int electrode_group, double eta) : groupbit = group->bitmask[igroup]; ngroup = group->count(igroup); this->eta = eta; + etaflag = false; tfflag = false; } @@ -72,6 +73,14 @@ void ElectrodeMatrix::setup_tf(const std::map &tf_types) /* ---------------------------------------------------------------------- */ +void ElectrodeMatrix::setup_eta(int index) +{ + etaflag = true; + eta_index = index; +} + +/* ---------------------------------------------------------------------- */ + void ElectrodeMatrix::compute_array(double **array, bool timer_flag) { // setting all entries of coulomb matrix to zero @@ -115,8 +124,6 @@ void ElectrodeMatrix::pair_contribution(double **array) int nlocal = atom->nlocal; int newton_pair = force->newton_pair; - double const etaij = eta * eta / sqrt(2.0 * eta * eta); // see mw ewald theory eq. (29)-(30) - // neighbor list will be ready because called from post_neighbor inum = list->inum; ilist = list->ilist; @@ -135,6 +142,7 @@ void ElectrodeMatrix::pair_contribution(double **array) xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; + double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; @@ -152,6 +160,9 @@ void ElectrodeMatrix::pair_contribution(double **array) jtype = type[j]; if (rsq < cutsq[itype][jtype]) { + double const eta_j = etaflag ? atom->dvector[eta_index][j] : eta; + double const etaij = eta_i * eta_j / sqrt(eta_i * eta_i + eta_j * eta_j); + r = sqrt(rsq); rinv = 1.0 / r; aij = rinv; @@ -178,7 +189,10 @@ void ElectrodeMatrix::self_contribution(double **array) const double preta = MY_SQRT2 / MY_PIS; for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { array[mpos[i]][mpos[i]] += preta * eta - selfint; } + if (mask[i] & groupbit) { + double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta; + array[mpos[i]][mpos[i]] += preta * eta_i - selfint; + } } /* ---------------------------------------------------------------------- */ diff --git a/src/ELECTRODE/electrode_matrix.h b/src/ELECTRODE/electrode_matrix.h index 8499cfdb34..1c64d8a4c4 100644 --- a/src/ELECTRODE/electrode_matrix.h +++ b/src/ELECTRODE/electrode_matrix.h @@ -30,6 +30,7 @@ class ElectrodeMatrix : protected Pointers { ElectrodeMatrix(class LAMMPS *, int, double); void setup(const std::unordered_map &, class Pair *, class NeighList *); void setup_tf(const std::map &); + void setup_eta(int); void compute_array(double **, bool); int igroup; @@ -39,6 +40,8 @@ class ElectrodeMatrix : protected Pointers { double **cutsq; double g_ewald, eta; bool tfflag; + bool etaflag; + int eta_index; std::map tf_types; std::unordered_map tag_to_iele; bool assigned; diff --git a/src/ELECTRODE/electrode_vector.cpp b/src/ELECTRODE/electrode_vector.cpp index 245ba57727..a2eaf784c5 100644 --- a/src/ELECTRODE/electrode_vector.cpp +++ b/src/ELECTRODE/electrode_vector.cpp @@ -29,6 +29,7 @@ #include "neigh_list.h" #include "pair.h" +#include #include #include @@ -47,6 +48,7 @@ ElectrodeVector::ElectrodeVector(LAMMPS *lmp, int sensor_group, int source_group source_grpbit = group->bitmask[source_group]; this->eta = eta; tfflag = false; + etaflag = false; kspace_time_total = 0; pair_time_total = 0; @@ -93,6 +95,14 @@ void ElectrodeVector::setup_tf(const std::map &tf_types) /* ---------------------------------------------------------------------- */ +void ElectrodeVector::setup_eta(int index) +{ + etaflag = true; + eta_index = index; +} + +/* ---------------------------------------------------------------------- */ + void ElectrodeVector::compute_vector(double *vector) { MPI_Barrier(world); @@ -121,7 +131,6 @@ void ElectrodeVector::compute_vector(double *vector) void ElectrodeVector::pair_contribution(double *vector) { - double const etaij = eta * MY_ISQRT2; double **x = atom->x; double *q = atom->q; int *type = atom->type; @@ -142,6 +151,7 @@ void ElectrodeVector::pair_contribution(double *vector) double const xtmp = x[i][0]; double const ytmp = x[i][1]; double const ztmp = x[i][2]; + double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta; int itype = type[i]; int *jlist = firstneigh[i]; int jnum = numneigh[i]; @@ -158,18 +168,22 @@ void ElectrodeVector::pair_contribution(double *vector) double const rsq = delx * delx + dely * dely + delz * delz; int jtype = type[j]; if (rsq >= cutsq[itype][jtype]) continue; + double const eta_j = etaflag ? atom->dvector[eta_index][j] : eta; + double etaij; + if (i_in_sensor && j_in_sensor) + etaij = eta_i * eta_j / sqrt(eta_i * eta_i + eta_j * eta_j); + else if (i_in_sensor) + etaij = eta_i; + else { + assert(j_in_sensor); + etaij = eta_j; + } double const r = sqrt(rsq); double const rinv = 1.0 / r; double aij = rinv; aij *= ElectrodeMath::safe_erfc(g_ewald * r); - if (invert_source) - aij -= ElectrodeMath::safe_erfc(eta * r) * rinv; - else - aij -= ElectrodeMath::safe_erfc(etaij * r) * rinv; - if (i_in_sensor) { - vector[i] += aij * q[j]; - //} else if (j_in_sensor) { - } + aij -= ElectrodeMath::safe_erfc(etaij * r) * rinv; + if (i_in_sensor) { vector[i] += aij * q[j]; } if (j_in_sensor && (!invert_source || !i_in_sensor)) { vector[j] += aij * q[i]; } } } @@ -189,9 +203,10 @@ void ElectrodeVector::self_contribution(double *vector) for (int ii = 0; ii < inum; ii++) { int const i = ilist[ii]; + double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta; bool const i_in_sensor = (mask[i] & groupbit); bool const i_in_source = !!(mask[i] & source_grpbit) != invert_source; - if (i_in_sensor && i_in_source) vector[i] += (preta * eta - selfint) * q[i]; + if (i_in_sensor && i_in_source) vector[i] += (preta * eta_i - selfint) * q[i]; } } diff --git a/src/ELECTRODE/electrode_vector.h b/src/ELECTRODE/electrode_vector.h index e7f637dd2d..a4f274a049 100644 --- a/src/ELECTRODE/electrode_vector.h +++ b/src/ELECTRODE/electrode_vector.h @@ -29,6 +29,7 @@ class ElectrodeVector : protected Pointers { ~ElectrodeVector() override; void setup(class Pair *, class NeighList *, bool); void setup_tf(const std::map &); + void setup_eta(int); void compute_vector(double *); int igroup, source_group; @@ -39,6 +40,8 @@ class ElectrodeVector : protected Pointers { double **cutsq; double g_ewald, eta; bool tfflag; + bool etaflag; + int eta_index; std::map tf_types; class Pair *pair; class NeighList *list; diff --git a/src/ELECTRODE/fix_electrode_conp.cpp b/src/ELECTRODE/fix_electrode_conp.cpp index 13866cd394..a87642182b 100644 --- a/src/ELECTRODE/fix_electrode_conp.cpp +++ b/src/ELECTRODE/fix_electrode_conp.cpp @@ -97,6 +97,7 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) : top_group = 0; intelflag = false; tfflag = false; + etaflag = false; timer_flag = false; update_time = 0; @@ -211,6 +212,19 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) : qtotal = utils::numeric(FLERR, arg[iarg], false, lmp); qtotal_var_style = VarStyle::CONST; } + } else if ((strcmp(arg[iarg], "eta") == 0)) { + if (iarg + 2 > narg) error->all(FLERR, "Need two arguments after eta command"); + etaflag = true; + int is_double, cols; + eta_index = atom->find_custom(arg[++iarg] + 2, is_double, cols); + if (eta_index == -1) + error->all(FLERR, "eta keyword requires name of previously defined property"); + if (!is_double) error->all(FLERR, "eta keyword requires double-valued property/atom vector"); + if (cols != 0) error->all(FLERR, "eta keyword requires property/atom vector not an array"); + if (!atom->nextra_border) + error->all(FLERR, + "There is no fix with ghost on, but the eta keyword requires a property/atom " + "fix with ghost on"); // toggle parameters } else if ((strcmp(arg[iarg], "etypes") == 0)) { etypes_neighlists = utils::logical(FLERR, arg[++iarg], false, lmp); @@ -520,8 +534,10 @@ void FixElectrodeConp::setup_post_neighbor() evscale = force->qe2f / force->qqrd2e; elyt_vector->setup(pair, vec_neighlist, timer_flag); + if (etaflag) elyt_vector->setup_eta(eta_index); if (need_elec_vector) { elec_vector->setup(pair, mat_neighlist, timer_flag); + if (etaflag) elec_vector->setup_eta(eta_index); if (tfflag) elec_vector->setup_tf(tf_types); } @@ -556,7 +572,8 @@ void FixElectrodeConp::setup_post_neighbor() if (etypes_neighlists) neighbor->build_one(mat_neighlist, 0); auto array_compute = std::unique_ptr(new ElectrodeMatrix(lmp, igroup, eta)); array_compute->setup(tag_to_iele, pair, mat_neighlist); - if (tfflag) { array_compute->setup_tf(tf_types); } + if (etaflag) array_compute->setup_eta(eta_index); + if (tfflag) array_compute->setup_tf(tf_types); array_compute->compute_array(elastance, timer_flag); } // write_mat before proceeding if (comm->me == 0 && write_mat) { @@ -1186,7 +1203,7 @@ double FixElectrodeConp::self_energy(int eflag) // corrections to energy due to self interaction double const qqrd2e = force->qqrd2e; int const nlocal = atom->nlocal; - double const pre = eta / sqrt(MY_2PI) * qqrd2e; + double const pre = 1. / sqrt(MY_2PI) * qqrd2e; int *mask = atom->mask; int *type = atom->type; double *q = atom->q; @@ -1194,7 +1211,8 @@ double FixElectrodeConp::self_energy(int eflag) for (int i = 0; i < nlocal; i++) { if (groupbit & mask[i]) { double const q2 = q[i] * q[i]; - double e = pre * q2; + double ieta = etaflag ? atom->dvector[eta_index][i] : eta; + double e = ieta * pre * q2; if (tfflag && (groupbit & mask[i])) e += 0.5 * qqrd2e * q2 * tf_types[type[i]]; energy += e; if (eflag) { @@ -1234,6 +1252,7 @@ double FixElectrodeConp::gausscorr(int eflag, bool fflag) double xtmp = x[i][0]; double ytmp = x[i][1]; double ztmp = x[i][2]; + double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta; int itype = type[i]; int *jlist = firstneigh[i]; int jnum = numneigh[i]; @@ -1242,7 +1261,6 @@ double FixElectrodeConp::gausscorr(int eflag, bool fflag) int const j = jlist[jj] & NEIGHMASK; bool j_in_ele = groupbit & mask[j]; if (!(i_in_ele || j_in_ele)) continue; - double eta_ij = (i_in_ele && j_in_ele) ? eta / MY_SQRT2 : eta; double delx = xtmp - x[j][0]; double dely = ytmp - x[j][1]; @@ -1251,6 +1269,16 @@ double FixElectrodeConp::gausscorr(int eflag, bool fflag) int jtype = type[j]; if (rsq < force->pair->cutsq[itype][jtype]) { + double const eta_j = etaflag ? atom->dvector[eta_index][j] : eta; + double eta_ij; + if (i_in_ele && j_in_ele) + eta_ij = eta_i * eta_j / sqrt(eta_i * eta_i + eta_j * eta_j); + else if (i_in_ele) + eta_ij = eta_i; + else { + assert(j_in_ele); + eta_ij = eta_j; + } double r2inv = 1.0 / rsq; double r = sqrt(rsq); double erfc_etar = 0.; diff --git a/src/ELECTRODE/fix_electrode_conp.h b/src/ELECTRODE/fix_electrode_conp.h index 689a44053c..fa972c0acc 100644 --- a/src/ELECTRODE/fix_electrode_conp.h +++ b/src/ELECTRODE/fix_electrode_conp.h @@ -138,6 +138,8 @@ class FixElectrodeConp : public Fix { int get_top_group(); // used by ffield int top_group; // used by ffield bool tfflag; + int eta_index; // index of atom property for eta + bool etaflag; // eta specified as atom property bool timer_flag; std::map tf_types; // cg From 61993d16432314055cc263779095f550c7d7a1db Mon Sep 17 00:00:00 2001 From: Leidy Lorena Alzate Vargas - 373576 Date: Wed, 22 Nov 2023 14:36:59 -0700 Subject: [PATCH 022/448] MLIAP Unified fix for multi layer models CPU only --- src/ML-IAP/mliap_unified.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/ML-IAP/mliap_unified.cpp b/src/ML-IAP/mliap_unified.cpp index de1d0bcb7d..6dcdf94c2d 100644 --- a/src/ML-IAP/mliap_unified.cpp +++ b/src/ML-IAP/mliap_unified.cpp @@ -254,10 +254,8 @@ void LAMMPS_NS::update_pair_energy(MLIAPData *data, double *eij) double e = 0.5 * eij[ii]; // must not count any contribution where i is not a local atom - if (i < nlocal) { - data->eatoms[i] += e; - e_total += e; - } + data->eatoms[i] += e; + e_total += e; } data->energy = e_total; } @@ -278,16 +276,14 @@ void LAMMPS_NS::update_pair_forces(MLIAPData *data, double *fij) int j = data->jatoms[ii]; // must not count any contribution where i is not a local atom - if (i < nlocal) { - f[i][0] += fij[ii3]; - f[i][1] += fij[ii3 + 1]; - f[i][2] += fij[ii3 + 2]; - f[j][0] -= fij[ii3]; - f[j][1] -= fij[ii3 + 1]; - f[j][2] -= fij[ii3 + 2]; + f[i][0] += fij[ii3]; + f[i][1] += fij[ii3 + 1]; + f[i][2] += fij[ii3 + 2]; + f[j][0] -= fij[ii3]; + f[j][1] -= fij[ii3 + 1]; + f[j][2] -= fij[ii3 + 2]; - if (data->vflag) data->pairmliap->v_tally(i, j, &fij[ii3], data->rij[ii]); - } + if (data->vflag) data->pairmliap->v_tally(i, j, &fij[ii3], data->rij[ii]); } } From 469358cbf4951c51472777bcd51055de9aaf2ed7 Mon Sep 17 00:00:00 2001 From: Leidy Lorena Alzate Vargas - 373576 Date: Wed, 22 Nov 2023 14:38:13 -0700 Subject: [PATCH 023/448] UPDATE KOKKOS --- src/KOKKOS/mliap_unified_kokkos.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/mliap_unified_kokkos.cpp b/src/KOKKOS/mliap_unified_kokkos.cpp index 4c38e4f1d6..4a8c14d723 100644 --- a/src/KOKKOS/mliap_unified_kokkos.cpp +++ b/src/KOKKOS/mliap_unified_kokkos.cpp @@ -315,7 +315,6 @@ void LAMMPS_NS::update_pair_forces(MLIAPDataKokkosDevice *data, double *fij) int i = pair_i[ii]; int j = j_atoms[ii]; // must not count any contribution where i is not a local atom - if (i < nlocal) { Kokkos::atomic_add(&f[i*3+0], fij[ii3+0]); Kokkos::atomic_add(&f[i*3+1], fij[ii3+1]); Kokkos::atomic_add(&f[i*3+2], fij[ii3+2]); @@ -352,7 +351,6 @@ void LAMMPS_NS::update_pair_forces(MLIAPDataKokkosDevice *data, double *fij) Kokkos::atomic_add(&d_vatom(j,3), 0.5*v[3]); Kokkos::atomic_add(&d_vatom(j,4), 0.5*v[4]); Kokkos::atomic_add(&d_vatom(j,5), 0.5*v[5]); - } } } }); @@ -383,10 +381,8 @@ void LAMMPS_NS::update_atom_energy(MLIAPDataKokkosDevice *data, double *ei) Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(int i, double &local_sum){ double e = ei[i]; // must not count any contribution where i is not a local atom - if (i < nlocal) { - d_eatoms[i] = e; - local_sum += e; - } + d_eatoms[i] = e; + local_sum += e; },*data->energy); } From c57ed87e9a9f73dfbc4e6e62a5f98ac46e083f48 Mon Sep 17 00:00:00 2001 From: Evangelos Voyiatzis Date: Thu, 23 Nov 2023 15:36:27 +0200 Subject: [PATCH 024/448] Addition of conical indenter --- src/fix_indent.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fix_indent.h b/src/fix_indent.h index 527e9ec277..6f33f6fbb1 100644 --- a/src/fix_indent.h +++ b/src/fix_indent.h @@ -49,6 +49,10 @@ class FixIndent : public Fix { int cdim, varflag; int ilevel_respa; + char *rlostr, *rhistr, *lostr, *histr; + int rlovar, rhivar, lovar, hivar; + double rlovalue, rhivalue, lovalue, hivalue; + void options(int, char **); }; From a90c7b42f99bf61d79a7cdacc69394b56de829b0 Mon Sep 17 00:00:00 2001 From: Evangelos Voyiatzis Date: Thu, 23 Nov 2023 15:40:34 +0200 Subject: [PATCH 025/448] Include code for conical indenter in fix_indent.cpp --- src/fix_indent.cpp | 466 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 398 insertions(+), 68 deletions(-) diff --git a/src/fix_indent.cpp b/src/fix_indent.cpp index 9adb337dd6..d3e80ecd79 100644 --- a/src/fix_indent.cpp +++ b/src/fix_indent.cpp @@ -23,6 +23,7 @@ #include "error.h" #include "input.h" #include "lattice.h" +#include "math_extra.h" #include "modify.h" #include "respa.h" #include "update.h" @@ -34,14 +35,30 @@ using namespace LAMMPS_NS; using namespace FixConst; -enum{NONE,SPHERE,CYLINDER,PLANE}; -enum{INSIDE,OUTSIDE}; +enum{NONE, SPHERE, CYLINDER, PLANE, CONE}; +enum{INSIDE, OUTSIDE}; + +static bool PointInsideCone(int dir, double *center, double lo, + double hi, double rlo, double rhi, double *point); + +static void DistanceExteriorPoint(int dir, double *center, + double lo, double hi, double rlo, double rhi, double &x, + double &y, double &z); + +static void DistanceInteriorPoint(int dir, double *center, + double lo, double hi, double rlo, double rhi, double &x, + double &y, double &z); + +static void point_on_line_segment(double *a, double *b, double *c, double *d); + +static double closest(double *x, double *near, double *nearest, double dsq); /* ---------------------------------------------------------------------- */ FixIndent::FixIndent(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), - xstr(nullptr), ystr(nullptr), zstr(nullptr), rstr(nullptr), pstr(nullptr) + xstr(nullptr), ystr(nullptr), zstr(nullptr), rstr(nullptr), pstr(nullptr), + rlostr(nullptr), rhistr(nullptr), lostr(nullptr), histr(nullptr) { if (narg < 4) utils::missing_cmd_args(FLERR, "fix indent", error); @@ -56,6 +73,7 @@ FixIndent::FixIndent(LAMMPS *lmp, int narg, char **arg) : ilevel_respa = 0; k = utils::numeric(FLERR,arg[3],false,lmp); + if (k < 0.0) error->all(FLERR, "Illegal fix indent force constant: {}", k); k3 = k/3.0; // read options from end of input line @@ -79,6 +97,30 @@ FixIndent::FixIndent(LAMMPS *lmp, int narg, char **arg) : if (!ystr) yvalue *= yscale; if (!zstr) zvalue *= zscale; if (!rstr) rvalue *= xscale; + } else if (istyle == CONE) { + + if (!xstr) xvalue *= xscale; + if (!ystr) yvalue *= yscale; + if (!zstr) zvalue *= zscale; + + double scaling_factor = 1.0; + switch (cdim) { + case 0: + scaling_factor = xscale; + break; + case 1: + scaling_factor = yscale; + break; + case 2: + scaling_factor = zscale; + break; + } + + if (!rlostr) rlovalue *= scaling_factor; + if (!rhistr) rhivalue *= scaling_factor; + if (!lostr) lovalue *= scaling_factor; + if (!histr) hivalue *= scaling_factor; + } else if (istyle == PLANE) { if (cdim == 0 && !pstr) pvalue *= xscale; else if (cdim == 1 && !pstr) pvalue *= yscale; @@ -86,7 +128,7 @@ FixIndent::FixIndent(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Unknown fix indent keyword: {}", istyle); varflag = 0; - if (xstr || ystr || zstr || rstr || pstr) varflag = 1; + if (xstr || ystr || zstr || rstr || pstr || rlostr || rhistr || lostr || histr) varflag = 1; indenter_flag = 0; indenter[0] = indenter[1] = indenter[2] = indenter[3] = 0.0; @@ -101,6 +143,10 @@ FixIndent::~FixIndent() delete [] zstr; delete [] rstr; delete [] pstr; + delete [] rlostr; + delete [] rhistr; + delete [] lostr; + delete [] histr; } /* ---------------------------------------------------------------------- */ @@ -154,6 +200,38 @@ void FixIndent::init() error->all(FLERR,"Variable {} for fix indent is invalid style", pstr); } + if (rlostr) { + rlovar = input->variable->find(rlostr); + if (rlovar < 0) + error->all(FLERR,"Variable {} for fix indent does not exist", rlostr); + if (!input->variable->equalstyle(rlovar)) + error->all(FLERR,"Variable {} for fix indent is invalid style", rlostr); + } + + if (rhistr) { + rhivar = input->variable->find(rhistr); + if (rhivar < 0) + error->all(FLERR,"Variable {} for fix indent does not exist", rhistr); + if (!input->variable->equalstyle(rhivar)) + error->all(FLERR,"Variable {} for fix indent is invalid style", rhistr); + } + + if (lostr) { + lovar = input->variable->find(lostr); + if (lovar < 0) + error->all(FLERR,"Variable {} for fix indent does not exist", lostr); + if (!input->variable->equalstyle(lovar)) + error->all(FLERR,"Variable {} for fix indent is invalid style", lostr); + } + + if (histr) { + hivar = input->variable->find(histr); + if (hivar < 0) + error->all(FLERR,"Variable {} for fix indent does not exist", histr); + if (!input->variable->equalstyle(hivar)) + error->all(FLERR,"Variable {} for fix indent is invalid style", histr); + } + if (utils::strmatch(update->integrate_style,"^respa")) { ilevel_respa = (dynamic_cast(update->integrate))->nlevels-1; if (respa_level >= 0) ilevel_respa = MIN(respa_level,ilevel_respa); @@ -192,32 +270,28 @@ void FixIndent::post_force(int /*vflag*/) indenter_flag = 0; indenter[0] = indenter[1] = indenter[2] = indenter[3] = 0.0; + // ctr = current indenter centerz + double ctr[3] {xvalue, yvalue, zvalue}; + if (xstr) ctr[0] = input->variable->compute_equal(xvar); + if (ystr) ctr[1] = input->variable->compute_equal(yvar); + if (zstr) ctr[2] = input->variable->compute_equal(zvar); + + double **x = atom->x; + double **f = atom->f; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double delx, dely, delz, r, dr, fmag, fx, fy, fz; + // spherical indenter if (istyle == SPHERE) { - // ctr = current indenter center - // remap into periodic box - - double ctr[3]; - if (xstr) ctr[0] = input->variable->compute_equal(xvar); - else ctr[0] = xvalue; - if (ystr) ctr[1] = input->variable->compute_equal(yvar); - else ctr[1] = yvalue; - if (zstr) ctr[2] = input->variable->compute_equal(zvar); - else ctr[2] = zvalue; + // remap indenter center into periodic box domain->remap(ctr); - double radius; - if (rstr) radius = input->variable->compute_equal(rvar); - else radius = rvalue; - - double **x = atom->x; - double **f = atom->f; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double delx,dely,delz,r,dr,fmag,fx,fy,fz; + double radius { rstr ? input->variable->compute_equal(rvar) : rvalue}; + if (radius < 0.0) error->all(FLERR, "Illegal fix indent sphere radius: {}", radius); for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -254,38 +328,11 @@ void FixIndent::post_force(int /*vflag*/) // remap into periodic box // 3rd coord is just near box for remap(), since isn't used - double ctr[3]; - if (cdim == 0) { - ctr[0] = domain->boxlo[0]; - if (ystr) ctr[1] = input->variable->compute_equal(yvar); - else ctr[1] = yvalue; - if (zstr) ctr[2] = input->variable->compute_equal(zvar); - else ctr[2] = zvalue; - } else if (cdim == 1) { - if (xstr) ctr[0] = input->variable->compute_equal(xvar); - else ctr[0] = xvalue; - ctr[1] = domain->boxlo[1]; - if (zstr) ctr[2] = input->variable->compute_equal(zvar); - else ctr[2] = zvalue; - } else { - if (xstr) ctr[0] = input->variable->compute_equal(xvar); - else ctr[0] = xvalue; - if (ystr) ctr[1] = input->variable->compute_equal(yvar); - else ctr[1] = yvalue; - ctr[2] = domain->boxlo[2]; - } + ctr[cdim] = domain->boxlo[cdim]; domain->remap(ctr); - double radius; - if (rstr) radius = input->variable->compute_equal(rvar); - else radius = rvalue; - - double **x = atom->x; - double **f = atom->f; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double delx,dely,delz,r,dr,fmag,fx,fy,fz; + double radius { rstr ? input->variable->compute_equal(rvar) : rvalue}; + if (radius < 0.0) error->all(FLERR, "Illegal fix indent cylinder radius: {}", radius); for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -324,31 +371,85 @@ void FixIndent::post_force(int /*vflag*/) indenter[3] -= fz; } + // conical indenter + + } else if (istyle == CONE) { + + double radiuslo { rlostr ? input->variable->compute_equal(rlovar) : rlovalue }; + if (radiuslo < 0.0) error->all(FLERR, "Illegal fix indent cone lower radius: {}", radiuslo); + + double radiushi { rhistr ? input->variable->compute_equal(rhivar) : rhivalue }; + if (radiushi < 0.0) error->all(FLERR, "Illegal fix indent cone high radius: {}", radiushi); + + double initial_lo { lostr ? input->variable->compute_equal(lovar) : lovalue }; + double initial_hi { histr ? input->variable->compute_equal(hivar) : hivalue }; + + ctr[cdim] = 0.5 * (initial_hi + initial_lo); + + domain->remap(ctr); + + double hi = ctr[cdim] + 0.5 * (initial_hi - initial_lo); + double lo = ctr[cdim] - 0.5 * (initial_hi - initial_lo); + + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + + delx = x[i][0] - ctr[0]; + dely = x[i][1] - ctr[1]; + delz = x[i][2] - ctr[2]; + domain->minimum_image(delx, dely, delz); + + double x0[3] {delx + ctr[0], dely + ctr[1], delz + ctr[2]}; + r = sqrt(delx * delx + dely * dely + delz * delz); + + // find if the particle is inside or outside the cone + bool point_inside_cone = PointInsideCone(cdim, ctr, lo, hi, radiuslo, radiushi, x0); + + if (side == INSIDE && point_inside_cone) continue; + if (side == OUTSIDE && !point_inside_cone) continue; + + // find the distance between the point and the cone + if (point_inside_cone) { + DistanceInteriorPoint(cdim, ctr, lo, hi, radiuslo, radiushi, x0[0], x0[1], x0[2]); + } else { + DistanceExteriorPoint(cdim, ctr, lo, hi, radiuslo, radiushi, x0[0], x0[1], x0[2]); + } + + // compute the force from the center of the cone - it is different from the approach of fix wall/region + dr = sqrt(x0[0] * x0[0] + x0[1] * x0[1] + x0[2] * x0[2]); + + int force_sign = { point_inside_cone ? 1 : -1 }; + fmag = force_sign * k * dr * dr; + + fx = delx*fmag/r; + fy = dely*fmag/r; + fz = delz*fmag/r; + f[i][0] += fx; + f[i][1] += fy; + f[i][2] += fz; + indenter[0] -= k3 * dr * dr * dr; + indenter[1] -= fx; + indenter[2] -= fy; + indenter[3] -= fz; + } + } + // planar indenter } else { // plane = current plane position - double plane; - if (pstr) plane = input->variable->compute_equal(pvar); - else plane = pvalue; - - double **x = atom->x; - double **f = atom->f; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double dr,fatom; + double plane { pstr ? input->variable->compute_equal(pvar) : pvalue}; for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { dr = planeside * (plane - x[i][cdim]); if (dr >= 0.0) continue; - fatom = -planeside * k*dr*dr; - f[i][cdim] += fatom; - indenter[0] -= k3 * dr*dr*dr; - indenter[cdim+1] -= fatom; + fmag = -planeside * k * dr * dr; + f[i][cdim] += fmag; + indenter[0] -= k3 * dr * dr * dr; + indenter[cdim+1] -= fmag; } } @@ -487,6 +588,64 @@ void FixIndent::options(int narg, char **arg) istyle = PLANE; iarg += 4; + } else if (strcmp(arg[iarg],"cone") == 0) { + + if (iarg+8 > narg) utils::missing_cmd_args(FLERR, "fix indent cone", error); + + if (strcmp(arg[iarg+1],"x") == 0) { + cdim = 0; + + if (utils::strmatch(arg[iarg+2],"^v_")) { + ystr = utils::strdup(arg[iarg+2]+2); + } else yvalue = utils::numeric(FLERR,arg[iarg+2],false,lmp); + + if (utils::strmatch(arg[iarg+3],"^v_")) { + zstr = utils::strdup(arg[iarg+3]+2); + } else zvalue = utils::numeric(FLERR,arg[iarg+3],false,lmp); + + } else if (strcmp(arg[iarg+1],"y") == 0) { + cdim = 1; + + if (utils::strmatch(arg[iarg+2],"^v_")) { + xstr = utils::strdup(arg[iarg+2]+2); + } else xvalue = utils::numeric(FLERR,arg[iarg+2],false,lmp); + + if (utils::strmatch(arg[iarg+3],"^v_")) { + zstr = utils::strdup(arg[iarg+3]+2); + } else zvalue = utils::numeric(FLERR,arg[iarg+3],false,lmp); + + } else if (strcmp(arg[iarg+1],"z") == 0) { + cdim = 2; + + if (utils::strmatch(arg[iarg+2],"^v_")) { + xstr = utils::strdup(arg[iarg+2]+2); + } else xvalue = utils::numeric(FLERR,arg[iarg+2],false,lmp); + + if (utils::strmatch(arg[iarg+3],"^v_")) { + ystr = utils::strdup(arg[iarg+3]+2); + } else yvalue = utils::numeric(FLERR,arg[iarg+3],false,lmp); + + } else error->all(FLERR,"Unknown fix indent cone argument: {}", arg[iarg+1]); + + if (utils::strmatch(arg[iarg+4],"^v_")) { + rlostr = utils::strdup(arg[iarg+4]+2); + } else rlovalue = utils::numeric(FLERR,arg[iarg+4],false,lmp); + + if (utils::strmatch(arg[iarg+5],"^v_")) { + rhistr = utils::strdup(arg[iarg+5]+2); + } else rhivalue = utils::numeric(FLERR,arg[iarg+5],false,lmp); + + if (utils::strmatch(arg[iarg+6],"^v_")) { + lostr = utils::strdup(arg[iarg+6]+2); + } else lovalue = utils::numeric(FLERR,arg[iarg+6],false,lmp); + + if (utils::strmatch(arg[iarg+7],"^v_")) { + histr = utils::strdup(arg[iarg+7]+2); + } else hivalue = utils::numeric(FLERR,arg[iarg+7],false,lmp); + + istyle = CONE; + iarg += 8; + } else if (strcmp(arg[iarg],"units") == 0) { if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix indent units", error); if (strcmp(arg[iarg+1],"box") == 0) scaleflag = 0; @@ -503,3 +662,174 @@ void FixIndent::options(int narg, char **arg) } else error->all(FLERR,"Unknown fix indent argument: {}", arg[iarg]); } } + +/* ---------------------------------------------------------------------- + determines if a point is inside (true) or outside (false) of a cone +------------------------------------------------------------------------- */ + +bool PointInsideCone(int dir, double *center, double lo, + double hi, double rlo, double rhi, double *x) +{ + if ((x[dir] > hi) || (x[dir] < lo)) return false; + + double del[3] {x[0] - center[0], x[1] - center[1], x[2] - center[2]}; + del[dir] = 0.0; + + double dist = sqrt(del[0] * del[0] + del[1] * del[1] + del[2] * del[2]); + double currentradius = rlo + (x[dir] - lo) * (rhi - rlo) / (hi - lo); + + if (dist > currentradius) return false; + + return true; +} + +/* ---------------------------------------------------------------------- + distance between an exterior point and a cone +------------------------------------------------------------------------- */ +void DistanceExteriorPoint(int dir, double *center, double lo, double hi, + double rlo, double rhi, double &x, double &y, double &z) +{ + double xp[3], nearest[3], corner1[3], corner2[3]; + + double point[3] {x, y, z}; + + double del[3] {x - center[0], y - center[1], z - center[2]}; + del[dir] = 0.0; + + double r = sqrt(del[0] * del[0] + del[1] * del[1] + del[2] * del[2]); + + corner1[0] = center[0] + del[0] * rlo / r; + corner1[1] = center[1] + del[1] * rlo / r; + corner1[2] = center[2] + del[2] * rlo / r; + corner1[dir] = lo; + + corner2[0] = center[0] + del[0] * rhi / r; + corner2[1] = center[1] + del[1] * rhi / r; + corner2[2] = center[2] + del[2] * rhi / r; + corner2[dir] = hi; + + double corner3[3] {center[0], center[1], center[2]}; + corner3[dir] = lo; + + double corner4[3] {center[0], center[1], center[2]}; + corner4[dir] = hi; + + // initialize distance to a big number + double distsq = 1.0e20; + + // check the first triangle + point_on_line_segment(corner1, corner2, point, xp); + distsq = closest(point, xp, nearest, distsq); + + // check the second triangle + point_on_line_segment(corner1, corner3, point, xp); + distsq = closest(point, xp, nearest, distsq); + + // check the third triangle + point_on_line_segment(corner2, corner4, point, xp); + distsq = closest(point, xp, nearest, distsq); + + x -= nearest[0]; + y -= nearest[1]; + z -= nearest[2]; + + return; +} + +/* ---------------------------------------------------------------------- + distance between an interior point and a cone +------------------------------------------------------------------------- */ + +void DistanceInteriorPoint(int dir, double *center, + double lo, double hi, double rlo, double rhi, double &x, + double &y, double &z) +{ + double r, dist_disk, dist_surf; + double surflo[3], surfhi[3], xs[3]; + double initial_point[3] {x, y, z}; + double point[3] {0.0, 0.0, 0.0}; + + // initial check with the two disks + if ( (initial_point[dir] - lo) < (hi - initial_point[dir]) ) { + dist_disk = (initial_point[dir] - lo) * (initial_point[dir] - lo); + point[dir] = initial_point[dir] - lo; + } else { + dist_disk = (hi - initial_point[dir]) * (hi - initial_point[dir]); + point[dir] = initial_point[dir] - hi; + } + + // check with the points in the conical surface + double del[3] {x - center[0], y - center[1], z - center[2]}; + del[dir] = 0.0; + r = sqrt(del[0] * del[0] + del[1] * del[1] + del[2] * del[2]); + + surflo[0] = center[0] + del[0] * rlo / r; + surflo[1] = center[1] + del[1] * rlo / r; + surflo[2] = center[2] + del[2] * rlo / r; + surflo[dir] = lo; + + surfhi[0] = center[0] + del[0] * rhi / r; + surfhi[1] = center[1] + del[1] * rhi / r; + surfhi[2] = center[2] + del[2] * rhi / r; + surfhi[dir] = hi; + + point_on_line_segment(surflo, surfhi, initial_point, xs); + + double dx[3] {initial_point[0] - xs[0], initial_point[1] - xs[1], initial_point[2] - xs[2]}; + dist_surf = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + if (dist_surf < dist_disk) { + x = dx[0]; + y = dx[1]; + z = dx[2]; + } else { + x = point[0]; + y = point[1]; + z = point[2]; + } + + return; +} + +/* ---------------------------------------------------------------------- + helper function extracted from region.cpp +------------------------------------------------------------------------- */ + +void point_on_line_segment(double *a, double *b, double *c, double *d) +{ + double ba[3], ca[3]; + + MathExtra::sub3(b, a, ba); + MathExtra::sub3(c, a, ca); + double t = MathExtra::dot3(ca, ba) / MathExtra::dot3(ba, ba); + if (t <= 0.0) { + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + } else if (t >= 1.0) { + d[0] = b[0]; + d[1] = b[1]; + d[2] = b[2]; + } else { + d[0] = a[0] + t * ba[0]; + d[1] = a[1] + t * ba[1]; + d[2] = a[2] + t * ba[2]; + } +} + +/* ---------------------------------------------------------------------- + helper function extracted from region_cone.cpp +------------------------------------------------------------------------- */ + +double closest(double *x, double *near, double *nearest, double dsq) +{ + double dx = x[0] - near[0]; + double dy = x[1] - near[1]; + double dz = x[2] - near[2]; + double rsq = dx * dx + dy * dy + dz * dz; + if (rsq >= dsq) return dsq; + + nearest[0] = near[0]; + nearest[1] = near[1]; + nearest[2] = near[2]; + return rsq; +} From e1c028c785fface11d8eba7d6ab57e42d278e654 Mon Sep 17 00:00:00 2001 From: Evangelos Voyiatzis Date: Thu, 23 Nov 2023 16:02:22 +0200 Subject: [PATCH 026/448] Update documentation Describe the arguments for cone indenter style --- doc/src/fix_indent.rst | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/doc/src/fix_indent.rst b/doc/src/fix_indent.rst index 15790e15d0..d478d7dc50 100644 --- a/doc/src/fix_indent.rst +++ b/doc/src/fix_indent.rst @@ -14,17 +14,23 @@ Syntax * indent = style name of this fix command * K = force constant for indenter surface (force/distance\^2 units) * one or more keyword/value pairs may be appended -* keyword = *sphere* or *cylinder* or *plane* or *side* or *units* +* keyword = *sphere* or *cone* or *cylinder* or *plane* or *side* or *units* .. parsed-literal:: *sphere* args = x y z R - x,y,z = position of center of indenter (distance units) + x, y, z = position of center of indenter (distance units) R = sphere radius of indenter (distance units) - any of x,y,z,R can be a variable (see below) + any of x, y, z, R can be a variable (see below) + *cone* args = dim c1 c2 radlo radhi lo hi + dim = *x* or *y* or *z* = axis of cone + c1, c2 = coords of cone axis in other 2 dimensions (distance units) + radlo,radhi = cone radii at lo and hi end (distance units) + lo,hi = bounds of cone in dim (distance units) + any of c1, c2, radlo, radhi, lo, hi can be a variable (see below) *cylinder* args = dim c1 c2 R dim = *x* or *y* or *z* = axis of cylinder - c1,c2 = coords of cylinder axis in other 2 dimensions (distance units) + c1, c2 = coords of cylinder axis in other 2 dimensions (distance units) R = cylinder radius of indenter (distance units) any of c1,c2,R can be a variable (see below) *plane* args = dim pos side @@ -57,7 +63,7 @@ material or as an obstacle in a flow. Or it can be used as a constraining wall around a simulation; see the discussion of the *side* keyword below. -The indenter can either be spherical or cylindrical or planar. You +The indenter can either be spherical or conical or cylindrical or planar. You must set one of those 3 keywords. A spherical indenter exerts a force of magnitude @@ -75,9 +81,9 @@ A cylindrical indenter exerts the same force, except that *r* is the distance from the atom to the center axis of the cylinder. The cylinder extends infinitely along its axis. -Spherical and cylindrical indenters account for periodic boundaries in +Spherical, conical and cylindrical indenters account for periodic boundaries in two ways. First, the center point of a spherical indenter (x,y,z) or -axis of a cylindrical indenter (c1,c2) is remapped back into the +axis of a conical/cylindrical indenter (c1,c2) is remapped back into the simulation box, if the box is periodic in a particular dimension. This occurs every timestep if the indenter geometry is specified with a variable (see below), e.g. it is moving over time. Second, the From 82868cd583c1dab43c541340f43161e4e9a4f1aa Mon Sep 17 00:00:00 2001 From: Ludwig Ahrens Date: Mon, 4 Dec 2023 13:59:07 +0100 Subject: [PATCH 027/448] Simplify Madelung examples with mixed eta --- examples/PACKAGES/electrode/madelung/data.eta | 34 ------------------- .../PACKAGES/electrode/madelung/data.eta_mix | 34 ------------------- examples/PACKAGES/electrode/madelung/in.eta | 13 +++---- .../PACKAGES/electrode/madelung/in.eta_cg | 15 ++++---- .../PACKAGES/electrode/madelung/in.eta_mix | 13 +++---- .../electrode/madelung/settings_eta.mod | 19 ----------- 6 files changed, 16 insertions(+), 112 deletions(-) delete mode 100644 examples/PACKAGES/electrode/madelung/data.eta delete mode 100644 examples/PACKAGES/electrode/madelung/data.eta_mix delete mode 100644 examples/PACKAGES/electrode/madelung/settings_eta.mod diff --git a/examples/PACKAGES/electrode/madelung/data.eta b/examples/PACKAGES/electrode/madelung/data.eta deleted file mode 100644 index b05700a4ab..0000000000 --- a/examples/PACKAGES/electrode/madelung/data.eta +++ /dev/null @@ -1,34 +0,0 @@ -LAMMPS data file via write_data, version 24 Dec 2020, timestep = 0 - -4 atoms -3 atom types - -0 1 xlo xhi -0 1 ylo yhi --10 10 zlo zhi - -Masses - -1 196.966553 -2 196.966553 -3 1.0 - -Pair Coeffs # lj/cut/coul/long - -1 0 0 -2 0 0 -3 0 0 - -Atoms # full - -1 1 1 0.00 0.00 0.00 -2.00 # bottom electrode -2 2 2 0.00 0.00 0.00 2.00 # top electrode -3 3 3 0.50 0.00 0.00 -1.00 # bottom electrolyte -4 3 3 -0.50 0.00 0.00 1.00 # top electrolyte - -ETA - -1 2.0 -2 2.0 -3 0 -4 0 diff --git a/examples/PACKAGES/electrode/madelung/data.eta_mix b/examples/PACKAGES/electrode/madelung/data.eta_mix deleted file mode 100644 index 9322ebd662..0000000000 --- a/examples/PACKAGES/electrode/madelung/data.eta_mix +++ /dev/null @@ -1,34 +0,0 @@ -LAMMPS data file via write_data, version 24 Dec 2020, timestep = 0 - -4 atoms -3 atom types - -0 1 xlo xhi -0 1 ylo yhi --10 10 zlo zhi - -Masses - -1 196.966553 -2 196.966553 -3 1.0 - -Pair Coeffs # lj/cut/coul/long - -1 0 0 -2 0 0 -3 0 0 - -Atoms # full - -1 1 1 0.00 0.00 0.00 -2.00 # bottom electrode -2 2 2 0.00 0.00 0.00 2.00 # top electrode -3 3 3 0.50 0.00 0.00 -1.00 # bottom electrolyte -4 3 3 -0.50 0.00 0.00 1.00 # top electrolyte - -ETA - -1 0.5 -2 3.0 -3 0 -4 0 diff --git a/examples/PACKAGES/electrode/madelung/in.eta b/examples/PACKAGES/electrode/madelung/in.eta index d928f8fed0..3a45bb1bf5 100644 --- a/examples/PACKAGES/electrode/madelung/in.eta +++ b/examples/PACKAGES/electrode/madelung/in.eta @@ -1,16 +1,13 @@ -atom_style full -units real boundary p p f - kspace_style ewald/electrode 1.0e-8 kspace_modify slab 8.0 # ew3dc -pair_style lj/cut/coul/long 12 -fix feta all property/atom d_eta ghost yes -read_data data.eta fix feta NULL ETA - -include "settings_eta.mod" +include "settings.mod" # styles, computes, groups and fixes +thermo_style custom step pe c_qbot c_qtop +fix feta all property/atom d_eta ghost on +set group bot d_eta 2.0 +set group top d_eta 2.0 fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv run 0 diff --git a/examples/PACKAGES/electrode/madelung/in.eta_cg b/examples/PACKAGES/electrode/madelung/in.eta_cg index e04db318d5..5ac8cddf17 100644 --- a/examples/PACKAGES/electrode/madelung/in.eta_cg +++ b/examples/PACKAGES/electrode/madelung/in.eta_cg @@ -1,17 +1,14 @@ -atom_style full -units real boundary p p f - kspace_style ewald/electrode 1.0e-8 kspace_modify slab 8.0 # ew3dc -pair_style lj/cut/coul/long 12 -fix feta all property/atom d_eta ghost yes -read_data data.eta_mix fix feta NULL ETA +include "settings.mod" # styles, computes, groups and fixes -include "settings_eta.mod" - -fix conp bot electrode/conp 0 2 couple top 1 symm on algo cg 1e-6 eta d_eta +thermo_style custom step pe c_qbot c_qtop +fix feta all property/atom d_eta ghost on +set group bot d_eta 0.5 +set group top d_eta 3.0 +fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta algo cg 1e-6 run 0 diff --git a/examples/PACKAGES/electrode/madelung/in.eta_mix b/examples/PACKAGES/electrode/madelung/in.eta_mix index d4bcf71225..d00e008fa4 100644 --- a/examples/PACKAGES/electrode/madelung/in.eta_mix +++ b/examples/PACKAGES/electrode/madelung/in.eta_mix @@ -1,16 +1,13 @@ -atom_style full -units real boundary p p f - kspace_style ewald/electrode 1.0e-8 kspace_modify slab 8.0 # ew3dc -pair_style lj/cut/coul/long 12 +include "settings.mod" # styles, computes, groups and fixes + +thermo_style custom step pe c_qbot c_qtop fix feta all property/atom d_eta ghost on -read_data data.eta_mix fix feta NULL ETA - -include "settings_eta.mod" - +set group bot d_eta 0.5 +set group top d_eta 3.0 fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv run 0 diff --git a/examples/PACKAGES/electrode/madelung/settings_eta.mod b/examples/PACKAGES/electrode/madelung/settings_eta.mod deleted file mode 100644 index aee63bf2e9..0000000000 --- a/examples/PACKAGES/electrode/madelung/settings_eta.mod +++ /dev/null @@ -1,19 +0,0 @@ - -# distribute electrode atoms among all processors: -if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2" - -group bot type 1 -group top type 2 - -# get electrode charges -variable q atom q -compute qbot bot reduce sum v_q -compute qtop top reduce sum v_q - -compute compute_pe all pe -variable vpe equal c_compute_pe -variable charge equal c_qtop -fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv" - -thermo_style custom step pe c_qbot c_qtop - From fc897512a029cee3a57b299c9dd4b93ab6bbff02 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 11:13:34 -0500 Subject: [PATCH 028/448] Initial conversion of all FFT_ settings in the KOKKOS subdirectory to FFT_KOKKOS_ --- src/KOKKOS/fft3d_kokkos.cpp | 84 ++++++++++++++++++------------------ src/KOKKOS/fft3d_kokkos.h | 10 ++--- src/KOKKOS/fftdata_kokkos.h | 86 ++++++++++++++++++------------------- src/KOKKOS/pppm_kokkos.cpp | 4 +- src/KOKKOS/pppm_kokkos.h | 32 +++++++------- 5 files changed, 108 insertions(+), 108 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 82e4140f77..ca3d18e11a 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -44,20 +44,20 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int int ngpus = lmp->kokkos->ngpus; ExecutionSpace execution_space = ExecutionSpaceFromDevice::space; -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs"); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs"); -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs"); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs"); -#elif defined(FFT_KISSFFT) +#elif defined(FFT_KOKKOS_KISSFFT) // The compiler can't statically determine the stack size needed for // recursive function calls in KISS FFT and the default per-thread // stack size on GPUs needs to be increased to prevent stack overflows @@ -149,20 +149,20 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT) +#if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; -#elif defined(FFT_MKL) +#elif defined(FFT_KOKKOS_MKL) d_out(i) *= norm; -#else // FFT_KISS +#else // FFT_KOKKOS_KISS d_out(i).re *= norm; d_out(i).im *= norm; #endif } }; -#ifdef FFT_KISSFFT +#ifdef FFT_KOKKOS_KISSFFT template struct kiss_fft_functor { public: @@ -219,19 +219,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total1; length = plan->length1; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_fast,d_data.data()); else DftiComputeBackward(plan->handle_fast,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = @@ -265,19 +265,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total2; length = plan->length2; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_mid,d_data.data()); else DftiComputeBackward(plan->handle_mid,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -309,19 +309,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total3; length = plan->length3; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_slow,d_data.data()); else DftiComputeBackward(plan->handle_slow,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -609,46 +609,46 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl // system specific pre-computation of 1d FFT coeffs // and scaling normalization -#if defined(FFT_MKL) - DftiCreateDescriptor( &(plan->handle_fast), FFT_MKL_PREC, DFTI_COMPLEX, 1, +#if defined(FFT_KOKKOS_MKL) + DftiCreateDescriptor( &(plan->handle_fast), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total1/nfast); DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_fast); - DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, + DftiCreateDescriptor( &(plan->handle_mid), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total2/nmid); DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_mid); - DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, + DftiCreateDescriptor( &(plan->handle_slow), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total3/nslow); DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_slow); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) -#if defined (FFT_FFTW_THREADS) +#if defined (FFT_KOKKOS_FFTW_THREADS) if (nthreads > 1) { FFTW_API(init_threads)(); FFTW_API(plan_with_nthreads)(nthreads); @@ -692,7 +692,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl nullptr,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -709,7 +709,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -726,7 +726,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, HIPFFT_TYPE,plan->total3/plan->length3); -#else /* FFT_KISS */ +#else /* FFT_KOKKOS_KISS */ kissfftKK = new KissFFTKokkos(); @@ -781,11 +781,11 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk if (plan->mid2_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->mid2_plan); if (plan->post_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->post_plan); -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) DftiFreeDescriptor(&(plan->handle_fast)); DftiFreeDescriptor(&(plan->handle_mid)); DftiFreeDescriptor(&(plan->handle_slow)); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) FFTW_API(destroy_plan)(plan->plan_slow_forward); FFTW_API(destroy_plan)(plan->plan_slow_backward); FFTW_API(destroy_plan)(plan->plan_mid_forward); @@ -793,11 +793,11 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk FFTW_API(destroy_plan)(plan->plan_fast_forward); FFTW_API(destroy_plan)(plan->plan_fast_backward); -#if defined (FFT_FFTW_THREADS) +#if defined (FFT_KOKKOS_FFTW_THREADS) FFTW_API(cleanup_threads)(); #endif -#elif defined (FFT_KISSFFT) +#elif defined (FFT_KOKKOS_KISSFFT) delete kissfftKK; #endif @@ -855,7 +855,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set -#if defined(FFT_MKL) || defined(FFT_FFTW3) +#if defined(FFT_KOKKOS_MKL) || defined(FFT_KOKKOS_FFTW3) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif @@ -866,7 +866,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // perform 1d FFTs in each of 3 dimensions // data is just an array of 0.0 -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) if (flag == -1) { DftiComputeForward(plan->handle_fast,d_data.data()); DftiComputeForward(plan->handle_mid,d_data.data()); @@ -876,7 +876,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ DftiComputeBackward(plan->handle_mid,d_data.data()); DftiComputeBackward(plan->handle_slow,d_data.data()); } -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) if (flag == -1) { FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); @@ -886,11 +886,11 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); } -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index a0489f69bb..ed49c4b1ee 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -45,22 +45,22 @@ struct fft_plan_3d_kokkos { double norm; // normalization factor for rescaling // system specific 1d FFT info -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) DFTI_DESCRIPTOR *handle_fast; DFTI_DESCRIPTOR *handle_mid; DFTI_DESCRIPTOR *handle_slow; -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) FFTW_API(plan) plan_fast_forward; FFTW_API(plan) plan_fast_backward; FFTW_API(plan) plan_mid_forward; FFTW_API(plan) plan_mid_backward; FFTW_API(plan) plan_slow_forward; FFTW_API(plan) plan_slow_backward; -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftHandle plan_fast; cufftHandle plan_mid; cufftHandle plan_slow; -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftHandle plan_fast; hipfftHandle plan_mid; hipfftHandle plan_slow; @@ -92,7 +92,7 @@ class FFT3dKokkos : protected Pointers { struct fft_plan_3d_kokkos *plan; RemapKokkos *remapKK; -#ifdef FFT_KISSFFT +#ifdef FFT_KOKKOS_KISSFFT KissFFTKokkos *kissfftKK; #endif diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index a3812a1cf0..bed2c7faf0 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -29,10 +29,10 @@ // Data types for single-precision complex -#if FFT_PRECISION == 1 -#elif FFT_PRECISION == 2 +#if FFT_KOKKOS_PRECISION == 1 +#elif FFT_KOKKOS_PRECISION == 2 #else -#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" +#error "FFT_KOKKOS_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif @@ -41,70 +41,70 @@ // FFTs here, since they may be valid in fft3d.cpp #ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #else -# if defined(FFT_CUFFT) -# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" +# if defined(FFT_KOKKOS_CUFFT) +# error "Must enable CUDA with KOKKOS to use -DFFT_KOKKOS_CUFFT" # endif -# if defined(FFT_HIPFFT) -# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT" +# if defined(FFT_KOKKOS_HIPFFT) +# error "Must enable HIP with KOKKOS to use -DFFT_KOKKOS_HIPFFT" # endif // if user set FFTW, it means FFTW3 -# ifdef FFT_FFTW -# define FFT_FFTW3 +# ifdef FFT_KOKKOS_FFTW +# define FFT_KOKKOS_FFTW3 # endif -# ifdef FFT_FFTW_THREADS -# if !defined(FFT_FFTW3) -# error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS" +# ifdef FFT_KOKKOS_FFTW_THREADS +# if !defined(FFT_KOKKOS_FFTW3) +# error "Must use -DFFT_KOKKOS_FFTW3 with -DFFT_KOKKOS_FFTW_THREADS" # endif # endif #endif -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) typedef float _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_SINGLE + #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else typedef double _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_DOUBLE + #define FFT_KOKKOS_MKL_PREC DFTI_DOUBLE #endif -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) typedef fftwf_complex FFT_DATA; #define FFTW_API(function) fftwf_ ## function #else typedef fftw_complex FFT_DATA; #define FFTW_API(function) fftw_ ## function #endif -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define cufftExec cufftExecC2C #define CUFFT_TYPE CUFFT_C2C typedef cufftComplex FFT_DATA; @@ -113,9 +113,9 @@ #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_DATA; #endif -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) #include - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C #define HIPFFT_TYPE HIPFFT_C2C typedef hipfftComplex FFT_DATA; @@ -125,7 +125,7 @@ typedef hipfftDoubleComplex FFT_DATA; #endif #else - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define kiss_fft_scalar float #else #define kiss_fft_scalar double @@ -134,13 +134,13 @@ kiss_fft_scalar re; kiss_fft_scalar im; } FFT_DATA; - #ifndef FFT_KISSFFT - #define FFT_KISSFFT + #ifndef FFT_KOKKOS_KISSFFT + #define FFT_KOKKOS_KISSFFT #endif #endif // (double[2]*) is not a 1D pointer -#if defined(FFT_FFTW3) +#if defined(FFT_KOKKOS_FFTW3) typedef FFT_SCALAR* FFT_DATA_POINTER; #else typedef FFT_DATA* FFT_DATA_POINTER; @@ -216,7 +216,7 @@ typedef struct FFTArrayTypes FFT_DAT; typedef struct FFTArrayTypes FFT_HAT; -#if defined(FFT_KISSFFT) +#if defined(FFT_KOKKOS_KISSFFT) #include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 912ae36f6f..17a9c82bdb 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -48,7 +48,7 @@ using namespace MathSpecialKokkos; enum{REVERSE_RHO}; enum{FORWARD_IK,FORWARD_IK_PERATOM}; -#ifdef FFT_SINGLE +#ifdef FFT_KOKKOS_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else @@ -2390,7 +2390,7 @@ void PPPMKokkos::compute_rho_coeff() s = 0.0; for (l = 0; l < j; l++) { a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / (l+1); -#ifdef FFT_SINGLE +#ifdef FFT_KOKKOS_SINGLE s += powf(0.5,(float) l+1) * (a[l][k-1+order] + powf(-1.0,(float) l) * a[l][k+1+order]) / (l+1); #else diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index d621313873..14d4670dbd 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -36,30 +36,30 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos); // fix up FFT defines for KOKKOS with CUDA and HIP #ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #endif From 0e98e706c69881f5562ce12285f80687ac8baf6c Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 11:49:54 -0500 Subject: [PATCH 029/448] Added LMPFFT settings for Kokkos-specific FFTs --- src/KOKKOS/lmpfftsettings_kokkos.h | 54 ++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/KOKKOS/lmpfftsettings_kokkos.h diff --git a/src/KOKKOS/lmpfftsettings_kokkos.h b/src/KOKKOS/lmpfftsettings_kokkos.h new file mode 100644 index 0000000000..148e001de3 --- /dev/null +++ b/src/KOKKOS/lmpfftsettings_kokkos.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +// common FFT library related defines and compilation settings + +#ifndef LMP_FFT_KOKKOS_SETTINGS_H +#define LMP_FFT_KOKKOS_SETTINGS_H + +// if user set FFTW, it means FFTW3 + +#ifdef FFT_KOKKOS_FFTW +#ifndef FFT_KOKKOS_FFTW3 +#define FFT_KOKKOS_FFTW3 +#endif +#endif + +// set strings for library info output + +#if defined(FFT_KOKKOS_FFTW3) +#define LMP_FFT_KOKKOS_LIB "FFTW3" +#elif defined(FFT_KOKKOS_MKL) +#define LMP_FFT_KOKKOS_LIB "MKL FFT" +#elif defined(FFT_KOKKOS_CUFFT) +#define LMP_FFT_KOKKOS_LIB "cuFFT" +#elif defined(FFT_KOKKOS_HIPFFT) +#define LMP_FFT_KOKKOS_LIB "hipFFT" +#else +#define LMP_FFT_KOKKOS_LIB "KISS FFT" +#endif + +#ifdef FFT_KOKKOS_SINGLE +typedef float FFT_KOKKOS_SCALAR; +#define FFT_KOKKOS_PRECISION 1 +#define LMP_FFT_KOKKOS_PREC "single" +#define MPI_FFT_KOKKOS_SCALAR MPI_FLOAT +#else + +typedef double FFT_KOKKOS_SCALAR; +#define FFT_KOKKOS_PRECISION 2 +#define LMP_FFT_KOKKOS_PREC "double" +#define MPI_FFT_KOKKOS_SCALAR MPI_DOUBLE +#endif + +#endif From 48ef968dd21c4e474fdbf927a89b0585f5235568 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 12:01:06 -0500 Subject: [PATCH 030/448] Decoupled all variable typenames from the non-kokkos FFT --- src/KOKKOS/fft3d_kokkos.cpp | 120 +++++++++++++++++------------------ src/KOKKOS/fft3d_kokkos.h | 16 ++--- src/KOKKOS/fftdata_kokkos.h | 90 +++++++++++++------------- src/KOKKOS/grid3d_kokkos.cpp | 28 ++++---- src/KOKKOS/grid3d_kokkos.h | 14 ++-- src/KOKKOS/kissfft_kokkos.h | 76 +++++++++++----------- src/KOKKOS/kokkos_base_fft.h | 12 ++-- src/KOKKOS/pack_kokkos.h | 52 +++++++-------- src/KOKKOS/pppm_kokkos.cpp | 114 ++++++++++++++++----------------- src/KOKKOS/pppm_kokkos.h | 50 +++++++-------- src/KOKKOS/remap_kokkos.cpp | 20 +++--- src/KOKKOS/remap_kokkos.h | 20 +++--- 12 files changed, 306 insertions(+), 306 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index ca3d18e11a..7093136fe6 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typ /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -140,17 +140,17 @@ template struct norm_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_DATA_1d_um d_out; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { #if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) - FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); + FFT_KOKKOS_SCALAR* out_ptr = (FFT_KOKKOS_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_KOKKOS_MKL) @@ -167,14 +167,14 @@ template struct kiss_fft_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ public: #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_AT::t_FFT_DATA_1d d_data,d_copy; - typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -226,16 +226,16 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_fast,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -272,15 +272,15 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_mid,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -316,15 +316,15 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_slow,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -348,7 +348,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, // scaling if required if (flag == -1 && plan->scaled) { - FFT_SCALAR norm = plan->norm; + FFT_KOKKOS_SCALAR norm = plan->norm; int num = plan->normnum; norm_functor f(d_out,norm); @@ -443,7 +443,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->pre_plan = remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi,2,0,0,FFT_PRECISION, + first_klo,first_khi,2,0,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->pre_plan == nullptr) return nullptr; } @@ -468,7 +468,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, - second_klo,second_khi,2,1,0,FFT_PRECISION, + second_klo,second_khi,2,1,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->mid1_plan == nullptr) return nullptr; @@ -509,7 +509,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl second_jlo,second_jhi,second_klo,second_khi, second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, - third_ilo,third_ihi,2,1,0,FFT_PRECISION, + third_ilo,third_ihi,2,1,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->mid2_plan == nullptr) return nullptr; @@ -537,7 +537,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl third_klo,third_khi,third_ilo,third_ihi, third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, - out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION, + out_jlo,out_jhi,2,(permute+1)%3,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->post_plan == nullptr) return nullptr; } @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -697,34 +697,34 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - CUFFT_TYPE,plan->total1/plan->length1); + CUFFT_KOKKOS_TYPE,plan->total1/plan->length1); cufftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - CUFFT_TYPE,plan->total2/plan->length2); + CUFFT_KOKKOS_TYPE,plan->total2/plan->length2); cufftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - CUFFT_TYPE,plan->total3/plan->length3); + CUFFT_KOKKOS_TYPE,plan->total3/plan->length3); #elif defined(FFT_KOKKOS_HIPFFT) hipfftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - HIPFFT_TYPE,plan->total1/plan->length1); + HIPFFT_KOKKOS_TYPE,plan->total1/plan->length1); hipfftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - HIPFFT_TYPE,plan->total2/plan->length2); + HIPFFT_KOKKOS_TYPE,plan->total2/plan->length2); hipfftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - HIPFFT_TYPE,plan->total3/plan->length3); + HIPFFT_KOKKOS_TYPE,plan->total3/plan->length3); #else /* FFT_KOKKOS_KISS */ @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -878,13 +878,13 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ } #elif defined(FFT_KOKKOS_FFTW3) if (flag == -1) { - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); } else { - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); } #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); @@ -923,7 +923,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // limit num to size of data if (flag == 1 && plan->scaled) { - FFT_SCALAR norm = plan->norm; + FFT_KOKKOS_SCALAR norm = plan->norm; int num = MIN(plan->normnum,nsize); norm_functor f(d_data,norm); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index ed49c4b1ee..bb552ec4ef 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -28,14 +28,14 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -79,14 +79,14 @@ class FFT3dKokkos : protected Pointers { public: enum{FORWARD=1,BACKWARD=-1}; typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int); - void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int); + void compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int); + void timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index bed2c7faf0..a9ea2de896 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -20,10 +20,10 @@ // data types for 2d/3d FFTs -#ifndef LMP_FFT_DATA_KOKKOS_H -#define LMP_FFT_DATA_KOKKOS_H +#ifndef LMP_FFT_KOKKOS_DATA_H +#define LMP_FFT_KOKKOS_DATA_H -#include "lmpfftsettings.h" +#include "lmpfftsettings_kokkos.h" // ------------------------------------------------------------------------- @@ -87,42 +87,42 @@ #if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" #if defined(FFT_KOKKOS_SINGLE) - typedef float _Complex FFT_DATA; + typedef float _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else - typedef double _Complex FFT_DATA; + typedef double _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_DOUBLE #endif #elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" #if defined(FFT_KOKKOS_SINGLE) - typedef fftwf_complex FFT_DATA; + typedef fftwf_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftwf_ ## function #else - typedef fftw_complex FFT_DATA; + typedef fftw_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftw_ ## function #endif #elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" #if defined(FFT_KOKKOS_SINGLE) #define cufftExec cufftExecC2C - #define CUFFT_TYPE CUFFT_C2C - typedef cufftComplex FFT_DATA; + #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_C2C + typedef cufftComplex FFT_KOKKOS_DATA; #else #define cufftExec cufftExecZ2Z - #define CUFFT_TYPE CUFFT_Z2Z - typedef cufftDoubleComplex FFT_DATA; + #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_Z2Z + typedef cufftDoubleComplex FFT_KOKKOS_DATA; #endif #elif defined(FFT_KOKKOS_HIPFFT) #include #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C - #define HIPFFT_TYPE HIPFFT_C2C - typedef hipfftComplex FFT_DATA; + #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_C2C + typedef hipfftComplex FFT_KOKKOS_DATA; #else #define hipfftExec hipfftExecZ2Z - #define HIPFFT_TYPE HIPFFT_Z2Z - typedef hipfftDoubleComplex FFT_DATA; + #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_Z2Z + typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else #if defined(FFT_KOKKOS_SINGLE) @@ -133,7 +133,7 @@ typedef struct { kiss_fft_scalar re; kiss_fft_scalar im; - } FFT_DATA; + } FFT_KOKKOS_DATA; #ifndef FFT_KOKKOS_KISSFFT #define FFT_KOKKOS_KISSFFT #endif @@ -141,9 +141,9 @@ // (double[2]*) is not a 1D pointer #if defined(FFT_KOKKOS_FFTW3) - typedef FFT_SCALAR* FFT_DATA_POINTER; + typedef FFT_KOKKOS_SCALAR* FFT_KOKKOS_DATA_POINTER; #else - typedef FFT_DATA* FFT_DATA_POINTER; + typedef FFT_KOKKOS_DATA* FFT_KOKKOS_DATA_POINTER; #endif @@ -154,23 +154,23 @@ template <> struct FFTArrayTypes { typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; + DualView tdual_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev t_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev_um t_FFT_KOKKOS_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; +typedef tdual_FFT_KOKKOS_SCALAR_2d::t_dev t_FFT_KOKKOS_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; +typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_dev t_FFT_KOKKOS_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; +typedef tdual_FFT_KOKKOS_SCALAR_3d::t_dev t_FFT_KOKKOS_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; + DualView tdual_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_dev t_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_dev_um t_FFT_KOKKOS_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -186,23 +186,23 @@ struct FFTArrayTypes { //Kspace typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; + DualView tdual_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host t_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host_um t_FFT_KOKKOS_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; +typedef tdual_FFT_KOKKOS_SCALAR_2d::t_host t_FFT_KOKKOS_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; +typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_host t_FFT_KOKKOS_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; +typedef tdual_FFT_KOKKOS_SCALAR_3d::t_host t_FFT_KOKKOS_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; + DualView tdual_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_host t_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_host_um t_FFT_KOKKOS_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -212,12 +212,12 @@ typedef tdual_int_64::t_host_um t_int_64_um; }; #endif -typedef struct FFTArrayTypes FFT_DAT; -typedef struct FFTArrayTypes FFT_HAT; +typedef struct FFTArrayTypes FFT_KOKKOS_DAT; +typedef struct FFTArrayTypes FFT_KOKKOS_HAT; #if defined(FFT_KOKKOS_KISSFFT) -#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last +#include "kissfft_kokkos.h" // uses t_FFT_KOKKOS_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp index 9a82e0157d..0f8e0bdc4e 100644 --- a/src/KOKKOS/grid3d_kokkos.cpp +++ b/src/KOKKOS/grid3d_kokkos.cpp @@ -636,7 +636,7 @@ void Grid3dKokkos::setup_comm_tiled(int &nbuf1, int &nbuf2) template void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -655,14 +655,14 @@ void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: forward_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -709,13 +709,13 @@ forward_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -776,7 +776,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, template void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -795,14 +795,14 @@ void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -850,14 +850,14 @@ reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: reverse_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 19751d83c9..8e9f6cd051 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -27,16 +27,16 @@ class Grid3dKokkos : public Grid3d { enum { KSPACE = 0, PAIR = 1, FIX = 2 }; // calling classes typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int); Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); ~Grid3dKokkos() override; void forward_comm(int, void *, int, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm(int, void *, int, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); private: DAT::tdual_int_2d k_swap_packlist; @@ -57,13 +57,13 @@ class Grid3dKokkos : public Grid3d { void setup_comm_tiled(int &, int &) override; void forward_comm_kspace_brick(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void forward_comm_kspace_tiled(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_brick(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_tiled(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void grow_swap() override; diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 265677a21c..fc23bf7891 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -119,14 +119,14 @@ }while(0) */ -#define KISS_FFT_COS(phase) (FFT_SCALAR) cos(phase) -#define KISS_FFT_SIN(phase) (FFT_SCALAR) sin(phase) +#define KISS_FFT_KOKKOS_COS(phase) (FFT_KOKKOS_SCALAR) cos(phase) +#define KISS_FFT_KOKKOS_SIN(phase) (FFT_KOKKOS_SCALAR) sin(phase) #define HALF_OF(x) ((x)*.5) #define kf_cexp(x,x_index,phase) \ do{ \ - (x)(x_index).re = KISS_FFT_COS(phase);\ - (x)(x_index).im = KISS_FFT_SIN(phase);\ + (x)(x_index).re = KISS_FFT_KOKKOS_COS(phase);\ + (x)(x_index).im = KISS_FFT_KOKKOS_SIN(phase);\ }while(0) @@ -138,26 +138,26 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; int nfft; int inverse; - typename FFT_AT::t_int_64 d_factors; - typename FFT_AT::t_FFT_DATA_1d d_twiddles; - typename FFT_AT::t_FFT_DATA_1d d_scratch; + typename FFT_KOKKOS_AT::t_int_64 d_factors; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; }; template class KissFFTKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR t[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,11 +179,11 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR scratch[6][2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; const size_t m3=3*m; @@ -237,14 +237,14 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR scratch[5][2]; - FFT_SCALAR epi3[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR scratch[5][2]; + FFT_KOKKOS_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); epi3[0] = d_twiddles(fstride*m).re; epi3[1] = d_twiddles(fstride*m).im; @@ -289,13 +289,13 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; - FFT_SCALAR scratch[13][2]; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR ya[2],yb[2]; + FFT_KOKKOS_SCALAR scratch[13][2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; ya[0] = d_twiddles(fstride*m).re; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR t[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR t[2]; int Norig = st.nfft; - typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) + const typename FFT_KOKKOS_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) { const int beg = Fout_count; const int p = d_factors[factors_count++]; /* the radix */ @@ -452,7 +452,7 @@ class KissFFTKokkos { p[i] * m[i] = m[i-1] m0 = n */ - static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf) + static int kf_factor(int n, FFT_KOKKOS_HAT::t_int_64 h_facbuf) { int p=4, nf=0; double floor_sqrt; @@ -496,12 +496,12 @@ class KissFFTKokkos { st.nfft = nfft; st.inverse = inverse_fft; - typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); - typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d(); + typename FFT_KOKKOS_AT::tdual_int_64 k_factors = typename FFT_KOKKOS_AT::tdual_int_64(); + typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d(); if (nfft > 0) { - k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); + k_factors = typename FFT_KOKKOS_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); + k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 08369b3c78..567dc02ff3 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -12,8 +12,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef LMP_KOKKOS_BASE_FFT_H -#define LMP_KOKKOS_BASE_FFT_H +#ifndef LMP_KOKKOS_BASE_FFT_KOKKOS_H +#define LMP_KOKKOS_BASE_FFT_KOKKOS_H #include "fftdata_kokkos.h" @@ -24,10 +24,10 @@ class KokkosBaseFFT { KokkosBaseFFT() {} // Kspace - virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; }; } diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index fe90d294a6..97d35afe26 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -38,13 +38,13 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; struct pack_3d_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ public: } }; -static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ public: } }; -static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ public: } }; -static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ public: } }; -static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ public: } }; -static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ public: } }; -static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ public: } }; -static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ public: } }; -static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 17a9c82bdb..3fc90c088d 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -285,7 +285,7 @@ void PPPMKokkos::init() estimated_accuracy); mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n", estimated_accuracy/two_charge_force); - mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_LIB "\n"; + mesg += " using " LMP_FFT_KOKKOS_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n", ngrid_max,nfft_both_max); utils::logmesg(lmp,mesg); @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -595,8 +595,8 @@ void PPPMKokkos::compute(int eflag, int vflag) // to fully sum contribution in their 3d bricks // remap from 3d decomposition to FFT decomposition - gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); brick2fft(); // compute potential gradient on my FFT grid and @@ -609,14 +609,14 @@ void PPPMKokkos::compute(int eflag, int vflag) // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); // extra per-atom energy/virial communication if (evflag_atom) - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); // calculate the force on my particles @@ -730,8 +730,8 @@ void PPPMKokkos::allocate() npergrid = 3; - k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); // tally local grid sizes // ngrid = count of owned+ghost grid cells on this proc @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,17 +775,17 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); - k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); + d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -810,7 +810,7 @@ void PPPMKokkos::allocate() remap = new RemapKokkos(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag); + 1,0,0,FFT_KOKKOS_PRECISION,collective_flag,gpu_aware_flag); } /* ---------------------------------------------------------------------- @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication @@ -862,8 +862,8 @@ void PPPMKokkos::allocate_peratom() npergrid = 7; - k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); } /* ---------------------------------------------------------------------- @@ -1234,14 +1234,14 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) const { // The density_brick array is atomic for Half/Thread neighbor style - Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; + Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); int nz = d_part2grid(i,2); - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; @@ -1250,13 +1250,13 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; - const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; - const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; a_density_brick(mz,my,mx) += x0*d_rho1d(i,l+order/2,0); @@ -1294,9 +1294,9 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team if ( ((nz+nlower-nzlo_out)*ix*iy >= ito) || ((nz+nupper-nzlo_out+1)*ix*iy < ifrom) ) continue; - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; ny -= nylo_out; @@ -1304,15 +1304,15 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const int in = mz*ix*iy; - const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; const int im = in+my*ix; - const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; const int il = im+mx; @@ -2040,8 +2040,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_ik, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; + FFT_KOKKOS_SCALAR x0,y0,z0; + FFT_KOKKOS_SCALAR ekx,eky,ekz; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2100,8 +2100,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + FFT_KOKKOS_SCALAR dx,dy,dz,x0,y0,z0; + FFT_KOKKOS_SCALAR u,v0,v1,v2,v3,v4,v5; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2155,7 +2155,7 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2211,7 +2211,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_forward2, const int &i) con ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2269,7 +2269,7 @@ void PPPMKokkos::operator()(TagPPPM_unpack_forward2, const int &i) c ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2299,7 +2299,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_reverse, const int &i) cons ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2332,11 +2332,11 @@ void PPPMKokkos::operator()(TagPPPM_unpack_reverse, const int &i) co template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::compute_rho1d(const int i, const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) const +void PPPMKokkos::compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &dx, const FFT_KOKKOS_SCALAR &dy, + const FFT_KOKKOS_SCALAR &dz) const { int k,l; - FFT_SCALAR r1,r2,r3; + FFT_KOKKOS_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; @@ -2375,10 +2375,10 @@ template void PPPMKokkos::compute_rho_coeff() { int j,k,l,m; - FFT_SCALAR s; - FFT_SCALAR **a = new FFT_SCALAR *[order]; + FFT_KOKKOS_SCALAR s; + FFT_KOKKOS_SCALAR **a = new FFT_KOKKOS_SCALAR *[order]; for (int i = 0; i < order; ++i) - a[i] = new FFT_SCALAR[2*order+1]; + a[i] = new FFT_KOKKOS_SCALAR[2*order+1]; for (k = 0; k <= 2*order; k++) for (l = 0; l < order; l++) @@ -2586,18 +2586,18 @@ double PPPMKokkos::memory_usage() double bytes = (double)nmax*3 * sizeof(double); int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * (nzhi_out-nzlo_out+1); - bytes += (double)4 * nbrick * sizeof(FFT_SCALAR); + bytes += (double)4 * nbrick * sizeof(FFT_KOKKOS_SCALAR); if (triclinic) bytes += (double)3 * nfft_both * sizeof(double); bytes += (double)6 * nfft_both * sizeof(double); bytes += (double)nfft_both * sizeof(double); - bytes += (double)nfft_both*5 * sizeof(FFT_SCALAR); + bytes += (double)nfft_both*5 * sizeof(FFT_KOKKOS_SCALAR); if (peratom_allocate_flag) - bytes += (double)6 * nbrick * sizeof(FFT_SCALAR); + bytes += (double)6 * nbrick * sizeof(FFT_KOKKOS_SCALAR); // two Grid3d bufs - bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR); + bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_KOKKOS_SCALAR); return bytes; } diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 14d4670dbd..dc0fbd88e5 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -131,7 +131,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; PPPMKokkos(class LAMMPS *); ~PPPMKokkos() override; @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,31 +364,31 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_AT::t_FFT_SCALAR_3d d_density_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_u_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; typename AT::t_float_1d d_fky; typename AT::t_float_1d d_fkz; - FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft; - FFT_DAT::tdual_FFT_SCALAR_1d k_work1; - FFT_DAT::tdual_FFT_SCALAR_1d k_work2; - typename FFT_AT::t_FFT_SCALAR_1d d_density_fft; - typename FFT_AT::t_FFT_SCALAR_1d d_work1; - typename FFT_AT::t_FFT_SCALAR_1d d_work2; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; - //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; - FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff; - typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; - FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; + //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; @@ -398,7 +398,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { RemapKokkos *remap; Grid3dKokkos *gc; - FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_gc_buf1,k_gc_buf2; int ngc_buf1,ngc_buf2,npergrid; //int **part2grid; // storage for particle -> grid mapping @@ -429,17 +429,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void fieldforce_peratom() override; KOKKOS_INLINE_FUNCTION - void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &, - const FFT_SCALAR &) const; + void compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &, const FFT_KOKKOS_SCALAR &, + const FFT_KOKKOS_SCALAR &) const; void compute_rho_coeff(); void slabcorr() override; // grid communication - void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; - void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; // triclinic diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index efc6742a25..d6b8a5691c 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typ ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // use point-to-point communication int i,isend,irecv; - typename FFT_AT::t_FFT_SCALAR_1d d_scratch; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -120,20 +120,20 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // post all recvs into scratch space - FFT_SCALAR* v_scratch = d_scratch.data(); + FFT_KOKKOS_SCALAR* v_scratch = d_scratch.data(); if (!plan->usecuda_aware) { plan->h_scratch = Kokkos::create_mirror_view(d_scratch); v_scratch = plan->h_scratch.data(); } for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; + FFT_KOKKOS_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; MPI_Irecv(scratch,plan->recv_size[irecv], - MPI_FFT_SCALAR,plan->recv_proc[irecv],0, + MPI_FFT_KOKKOS_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); } - FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); + FFT_KOKKOS_SCALAR* v_sendbuf = plan->d_sendbuf.data(); if (!plan->usecuda_aware) { plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); v_sendbuf = plan->h_sendbuf.data(); @@ -149,7 +149,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d if (!plan->usecuda_aware) Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, + MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_KOKKOS_SCALAR, plan->send_proc[isend],0,plan->comm); } @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index a62c14f00b..035b58260e 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -27,14 +27,14 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends - FFT_HAT::t_FFT_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs - FFT_HAT::t_FFT_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs + void (*pack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -61,16 +61,16 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d); + void perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 51eebf311fde2f3848feae35ce3692a35d90fe9f Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 12:40:11 -0500 Subject: [PATCH 031/448] Added fft settings to Install.sh, fixed typo in HIPFFT_C2C and HIPFFT_Z2Z --- src/KOKKOS/Install.sh | 1 + src/KOKKOS/fftdata_kokkos.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 489efc55a0..ba6c4ed427 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -200,6 +200,7 @@ action kokkos_few.h action kokkos_type.h action kokkos.cpp action kokkos.h +action lmpfftsettings_kokkos.h action math_special_kokkos.cpp action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index a9ea2de896..d52bc0b968 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -117,11 +117,11 @@ #include #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C - #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_C2C + #define HIPFFT_KOKKOS_TYPE HIPFFT_C2C typedef hipfftComplex FFT_KOKKOS_DATA; #else #define hipfftExec hipfftExecZ2Z - #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_Z2Z + #define HIPFFT_KOKKOS_TYPE HIPFFT_Z2Z typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else From e80c3d3215a9de1f8e1c9041fc96c9713cb156ca Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 12 Dec 2023 12:20:24 -0700 Subject: [PATCH 032/448] Revert FFT_AT name change --- src/KOKKOS/fft3d_kokkos.cpp | 70 ++++++++++++++++++------------------- src/KOKKOS/fft3d_kokkos.h | 16 ++++----- src/KOKKOS/grid3d_kokkos.h | 2 +- src/KOKKOS/kissfft_kokkos.h | 52 +++++++++++++-------------- src/KOKKOS/pack_kokkos.h | 52 +++++++++++++-------------- src/KOKKOS/pppm_kokkos.cpp | 26 +++++++------- src/KOKKOS/pppm_kokkos.h | 24 ++++++------- src/KOKKOS/remap_kokkos.cpp | 10 +++--- src/KOKKOS/remap_kokkos.h | 16 ++++----- 9 files changed, 134 insertions(+), 134 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 7093136fe6..9e7b87b8d8 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALA /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -140,11 +140,11 @@ template struct norm_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_out; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION @@ -167,14 +167,14 @@ template struct kiss_fft_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ public: #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -234,8 +234,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -280,7 +280,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -324,7 +324,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FF hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index bb552ec4ef..9729bc6a63 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -28,14 +28,14 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -79,14 +79,14 @@ class FFT3dKokkos : protected Pointers { public: enum{FORWARD=1,BACKWARD=-1}; typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int); - void timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); + void compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int); + void timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 8e9f6cd051..864ac19c06 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -27,7 +27,7 @@ class Grid3dKokkos : public Grid3d { enum { KSPACE = 0, PAIR = 1, FIX = 2 }; // calling classes typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int); Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index fc23bf7891..66f32d29fb 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -138,25 +138,25 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; int nfft; int inverse; - typename FFT_KOKKOS_AT::t_int_64 d_factors; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; + typename FFT_AT::t_int_64 d_factors; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; }; template class KissFFTKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,10 +179,10 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; @@ -237,12 +237,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR scratch[5][2]; FFT_KOKKOS_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); @@ -289,12 +289,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; FFT_KOKKOS_SCALAR scratch[13][2]; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR t[2]; int Norig = st.nfft; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) + const typename FFT_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) { const int beg = Fout_count; const int p = d_factors[factors_count++]; /* the radix */ @@ -496,12 +496,12 @@ class KissFFTKokkos { st.nfft = nfft; st.inverse = inverse_fft; - typename FFT_KOKKOS_AT::tdual_int_64 k_factors = typename FFT_KOKKOS_AT::tdual_int_64(); - typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d(); + typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); + typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d(); if (nfft > 0) { - k_factors = typename FFT_KOKKOS_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); + k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); + k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index 97d35afe26..5e014db020 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -38,13 +38,13 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; struct pack_3d_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ public: } }; -static void pack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ public: } }; -static void unpack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ public: } }; -static void unpack_3d_permute1_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ public: } }; -static void unpack_3d_permute1_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ public: } }; -static void unpack_3d_permute1_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ public: } }; -static void unpack_3d_permute2_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ public: } }; -static void unpack_3d_permute2_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ public: } }; -static void unpack_3d_permute2_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 3fc90c088d..ed7ace08c1 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,16 +775,16 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index dc0fbd88e5..09513c9a2f 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -131,7 +131,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; PPPMKokkos(class LAMMPS *); ~PPPMKokkos() override; @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,11 +364,11 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; @@ -377,17 +377,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index d6b8a5691c..18ba626460 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALA ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKK // use point-to-point communication int i,isend,irecv; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index 035b58260e..ad5fa9833d 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -27,14 +27,14 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*pack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -61,16 +61,16 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d); + void perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 68c53886b8f52ec1f4af801783c87d368cef10e2 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 13 Dec 2023 15:09:02 -0700 Subject: [PATCH 033/448] Revert some name changes --- src/KOKKOS/Install.sh | 2 +- src/KOKKOS/fft3d_kokkos.cpp | 88 +++++++++++----------- src/KOKKOS/fft3d_kokkos.h | 12 +-- src/KOKKOS/fftdata_kokkos.h | 80 ++++++++++---------- src/KOKKOS/grid3d_kokkos.cpp | 28 +++---- src/KOKKOS/grid3d_kokkos.h | 12 +-- src/KOKKOS/kissfft_kokkos.h | 64 ++++++++-------- src/KOKKOS/kokkos_base_fft.h | 12 +-- src/KOKKOS/lmpfftsettings_kokkos.h | 15 +--- src/KOKKOS/pack_kokkos.h | 48 ++++++------ src/KOKKOS/pppm_kokkos.cpp | 116 ++++++++++++++--------------- src/KOKKOS/pppm_kokkos.h | 48 ++++++------ src/KOKKOS/remap_kokkos.cpp | 20 ++--- src/KOKKOS/remap_kokkos.h | 16 ++-- 14 files changed, 275 insertions(+), 286 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ba6c4ed427..2dcf49ce06 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -200,7 +200,7 @@ action kokkos_few.h action kokkos_type.h action kokkos.cpp action kokkos.h -action lmpfftsettings_kokkos.h +action lmpfftsettings_kokkos.h lmpfftsettings.h action math_special_kokkos.cpp action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 9e7b87b8d8..d78239606e 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_ /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -141,21 +141,21 @@ struct norm_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_out; + typename FFT_AT::t_FFT_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { #if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) - FFT_KOKKOS_SCALAR* out_ptr = (FFT_KOKKOS_SCALAR *)(d_out.data()+i); + FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_KOKKOS_MKL) d_out(i) *= norm; -#else // FFT_KOKKOS_KISS +#else // FFT_KISS d_out(i).re *= norm; d_out(i).im *= norm; #endif @@ -168,13 +168,13 @@ struct kiss_fft_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; + typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ public: #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_AT::t_FFT_DATA_1d d_data,d_copy; + typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -234,8 +234,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -280,7 +280,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -324,7 +324,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -348,7 +348,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 // scaling if required if (flag == -1 && plan->scaled) { - FFT_KOKKOS_SCALAR norm = plan->norm; + FFT_SCALAR norm = plan->norm; int num = plan->normnum; norm_functor f(d_out,norm); @@ -443,7 +443,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->pre_plan = remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi,2,0,0,FFT_KOKKOS_PRECISION, + first_klo,first_khi,2,0,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->pre_plan == nullptr) return nullptr; } @@ -468,7 +468,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, - second_klo,second_khi,2,1,0,FFT_KOKKOS_PRECISION, + second_klo,second_khi,2,1,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->mid1_plan == nullptr) return nullptr; @@ -509,7 +509,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl second_jlo,second_jhi,second_klo,second_khi, second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, - third_ilo,third_ihi,2,1,0,FFT_KOKKOS_PRECISION, + third_ilo,third_ihi,2,1,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->mid2_plan == nullptr) return nullptr; @@ -537,7 +537,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl third_klo,third_khi,third_ilo,third_ihi, third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, - out_jlo,out_jhi,2,(permute+1)%3,0,FFT_KOKKOS_PRECISION, + out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->post_plan == nullptr) return nullptr; } @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -697,17 +697,17 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - CUFFT_KOKKOS_TYPE,plan->total1/plan->length1); + CUFFT_TYPE,plan->total1/plan->length1); cufftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - CUFFT_KOKKOS_TYPE,plan->total2/plan->length2); + CUFFT_TYPE,plan->total2/plan->length2); cufftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - CUFFT_KOKKOS_TYPE,plan->total3/plan->length3); + CUFFT_TYPE,plan->total3/plan->length3); #elif defined(FFT_KOKKOS_HIPFFT) @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKO hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); @@ -923,7 +923,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKO // limit num to size of data if (flag == 1 && plan->scaled) { - FFT_KOKKOS_SCALAR norm = plan->norm; + FFT_SCALAR norm = plan->norm; int num = MIN(plan->normnum,nsize); norm_functor f(d_data,norm); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 9729bc6a63..ed49c4b1ee 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -34,8 +34,8 @@ struct fft_plan_3d_kokkos { struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps + typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -85,8 +85,8 @@ class FFT3dKokkos : protected Pointers { int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int); - void timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); + void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int); + void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index d52bc0b968..15dca33bcc 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -20,8 +20,8 @@ // data types for 2d/3d FFTs -#ifndef LMP_FFT_KOKKOS_DATA_H -#define LMP_FFT_KOKKOS_DATA_H +#ifndef LMP_FFT_DATA_KOKKOS_H +#define LMP_FFT_DATA_KOKKOS_H #include "lmpfftsettings_kokkos.h" @@ -29,10 +29,10 @@ // Data types for single-precision complex -#if FFT_KOKKOS_PRECISION == 1 -#elif FFT_KOKKOS_PRECISION == 2 +#if FFT_PRECISION == 1 +#elif FFT_PRECISION == 2 #else -#error "FFT_KOKKOS_PRECISION needs to be either 1 (=single) or 2 (=double)" +#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif @@ -86,7 +86,7 @@ #if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) typedef float _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else @@ -95,7 +95,7 @@ #endif #elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) typedef fftwf_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftwf_ ## function #else @@ -104,18 +104,18 @@ #endif #elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define cufftExec cufftExecC2C - #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_C2C + #define CUFFT_TYPE CUFFT_C2C typedef cufftComplex FFT_KOKKOS_DATA; #else #define cufftExec cufftExecZ2Z - #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_Z2Z + #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_KOKKOS_DATA; #endif #elif defined(FFT_KOKKOS_HIPFFT) #include - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define hipfftExec hipfftExecC2C #define HIPFFT_KOKKOS_TYPE HIPFFT_C2C typedef hipfftComplex FFT_KOKKOS_DATA; @@ -125,7 +125,7 @@ typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define kiss_fft_scalar float #else #define kiss_fft_scalar double @@ -141,7 +141,7 @@ // (double[2]*) is not a 1D pointer #if defined(FFT_KOKKOS_FFTW3) - typedef FFT_KOKKOS_SCALAR* FFT_KOKKOS_DATA_POINTER; + typedef FFT_SCALAR* FFT_KOKKOS_DATA_POINTER; #else typedef FFT_KOKKOS_DATA* FFT_KOKKOS_DATA_POINTER; #endif @@ -154,23 +154,23 @@ template <> struct FFTArrayTypes { typedef Kokkos:: - DualView tdual_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev t_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev_um t_FFT_KOKKOS_SCALAR_1d_um; + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; -typedef tdual_FFT_KOKKOS_SCALAR_2d::t_dev t_FFT_KOKKOS_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; -typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_dev t_FFT_KOKKOS_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; -typedef tdual_FFT_KOKKOS_SCALAR_3d::t_dev t_FFT_KOKKOS_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_dev t_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_dev_um t_FFT_KOKKOS_DATA_1d_um; + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -186,23 +186,23 @@ struct FFTArrayTypes { //Kspace typedef Kokkos:: - DualView tdual_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host t_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host_um t_FFT_KOKKOS_SCALAR_1d_um; + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; -typedef tdual_FFT_KOKKOS_SCALAR_2d::t_host t_FFT_KOKKOS_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; -typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_host t_FFT_KOKKOS_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; -typedef tdual_FFT_KOKKOS_SCALAR_3d::t_host t_FFT_KOKKOS_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_host t_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_host_um t_FFT_KOKKOS_DATA_1d_um; + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -212,12 +212,12 @@ typedef tdual_int_64::t_host_um t_int_64_um; }; #endif -typedef struct FFTArrayTypes FFT_KOKKOS_DAT; -typedef struct FFTArrayTypes FFT_KOKKOS_HAT; +typedef struct FFTArrayTypes FFT_DAT; +typedef struct FFTArrayTypes FFT_HAT; #if defined(FFT_KOKKOS_KISSFFT) -#include "kissfft_kokkos.h" // uses t_FFT_KOKKOS_DATA_1d, needs to come last +#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp index 0f8e0bdc4e..9a82e0157d 100644 --- a/src/KOKKOS/grid3d_kokkos.cpp +++ b/src/KOKKOS/grid3d_kokkos.cpp @@ -636,7 +636,7 @@ void Grid3dKokkos::setup_comm_tiled(int &nbuf1, int &nbuf2) template void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, + FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -655,14 +655,14 @@ void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: forward_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -709,13 +709,13 @@ forward_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -776,7 +776,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, template void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, + FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -795,14 +795,14 @@ void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -850,14 +850,14 @@ reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: reverse_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 864ac19c06..19751d83c9 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -34,9 +34,9 @@ class Grid3dKokkos : public Grid3d { ~Grid3dKokkos() override; void forward_comm(int, void *, int, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm(int, void *, int, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); private: DAT::tdual_int_2d k_swap_packlist; @@ -57,13 +57,13 @@ class Grid3dKokkos : public Grid3d { void setup_comm_tiled(int &, int &) override; void forward_comm_kspace_brick(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void forward_comm_kspace_tiled(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_brick(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_tiled(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void grow_swap() override; diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 66f32d29fb..265677a21c 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -119,14 +119,14 @@ }while(0) */ -#define KISS_FFT_KOKKOS_COS(phase) (FFT_KOKKOS_SCALAR) cos(phase) -#define KISS_FFT_KOKKOS_SIN(phase) (FFT_KOKKOS_SCALAR) sin(phase) +#define KISS_FFT_COS(phase) (FFT_SCALAR) cos(phase) +#define KISS_FFT_SIN(phase) (FFT_SCALAR) sin(phase) #define HALF_OF(x) ((x)*.5) #define kf_cexp(x,x_index,phase) \ do{ \ - (x)(x_index).re = KISS_FFT_KOKKOS_COS(phase);\ - (x)(x_index).im = KISS_FFT_KOKKOS_SIN(phase);\ + (x)(x_index).re = KISS_FFT_COS(phase);\ + (x)(x_index).im = KISS_FFT_SIN(phase);\ }while(0) @@ -142,8 +142,8 @@ struct kiss_fft_state_kokkos { int nfft; int inverse; typename FFT_AT::t_int_64 d_factors; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; + typename FFT_AT::t_FFT_DATA_1d d_twiddles; + typename FFT_AT::t_FFT_DATA_1d d_scratch; }; template @@ -153,11 +153,11 @@ class KissFFTKokkos { typedef FFTArrayTypes FFT_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR t[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,11 +179,11 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR scratch[6][2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; const size_t m3=3*m; @@ -237,14 +237,14 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR scratch[5][2]; - FFT_KOKKOS_SCALAR epi3[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR scratch[5][2]; + FFT_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); epi3[0] = d_twiddles(fstride*m).re; epi3[1] = d_twiddles(fstride*m).im; @@ -289,13 +289,13 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; - FFT_KOKKOS_SCALAR scratch[13][2]; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR ya[2],yb[2]; + FFT_SCALAR scratch[13][2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; ya[0] = d_twiddles(fstride*m).re; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR t[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR t[2]; int Norig = st.nfft; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) { @@ -452,7 +452,7 @@ class KissFFTKokkos { p[i] * m[i] = m[i-1] m0 = n */ - static int kf_factor(int n, FFT_KOKKOS_HAT::t_int_64 h_facbuf) + static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf) { int p=4, nf=0; double floor_sqrt; @@ -497,11 +497,11 @@ class KissFFTKokkos { st.inverse = inverse_fft; typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); - typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d(); + typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d(); if (nfft > 0) { k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); + k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 567dc02ff3..08369b3c78 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -12,8 +12,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef LMP_KOKKOS_BASE_FFT_KOKKOS_H -#define LMP_KOKKOS_BASE_FFT_KOKKOS_H +#ifndef LMP_KOKKOS_BASE_FFT_H +#define LMP_KOKKOS_BASE_FFT_H #include "fftdata_kokkos.h" @@ -24,10 +24,10 @@ class KokkosBaseFFT { KokkosBaseFFT() {} // Kspace - virtual void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; }; } diff --git a/src/KOKKOS/lmpfftsettings_kokkos.h b/src/KOKKOS/lmpfftsettings_kokkos.h index 148e001de3..6cea9bb63a 100644 --- a/src/KOKKOS/lmpfftsettings_kokkos.h +++ b/src/KOKKOS/lmpfftsettings_kokkos.h @@ -16,6 +16,8 @@ #ifndef LMP_FFT_KOKKOS_SETTINGS_H #define LMP_FFT_KOKKOS_SETTINGS_H +#include "lmpfftsettings.h" + // if user set FFTW, it means FFTW3 #ifdef FFT_KOKKOS_FFTW @@ -38,17 +40,4 @@ #define LMP_FFT_KOKKOS_LIB "KISS FFT" #endif -#ifdef FFT_KOKKOS_SINGLE -typedef float FFT_KOKKOS_SCALAR; -#define FFT_KOKKOS_PRECISION 1 -#define LMP_FFT_KOKKOS_PREC "single" -#define MPI_FFT_KOKKOS_SCALAR MPI_FLOAT -#else - -typedef double FFT_KOKKOS_SCALAR; -#define FFT_KOKKOS_PRECISION 2 -#define LMP_FFT_KOKKOS_PREC "double" -#define MPI_FFT_KOKKOS_SCALAR MPI_DOUBLE -#endif - #endif diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index 5e014db020..fe90d294a6 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -44,7 +44,7 @@ struct pack_3d_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ public: } }; -static void pack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ public: } }; -static void unpack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ public: } }; -static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ public: } }; -static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ public: } }; -static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ public: } }; -static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ public: } }; -static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ public: } }; -static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index ed7ace08c1..6e1b3a83fa 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -48,7 +48,7 @@ using namespace MathSpecialKokkos; enum{REVERSE_RHO}; enum{FORWARD_IK,FORWARD_IK_PERATOM}; -#ifdef FFT_KOKKOS_SINGLE +#ifdef FFT_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -595,8 +595,8 @@ void PPPMKokkos::compute(int eflag, int vflag) // to fully sum contribution in their 3d bricks // remap from 3d decomposition to FFT decomposition - gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); brick2fft(); // compute potential gradient on my FFT grid and @@ -609,14 +609,14 @@ void PPPMKokkos::compute(int eflag, int vflag) // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); // extra per-atom energy/virial communication if (evflag_atom) - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); // calculate the force on my particles @@ -730,8 +730,8 @@ void PPPMKokkos::allocate() npergrid = 3; - k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); // tally local grid sizes // ngrid = count of owned+ghost grid cells on this proc @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,17 +775,17 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); - k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -810,7 +810,7 @@ void PPPMKokkos::allocate() remap = new RemapKokkos(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_KOKKOS_PRECISION,collective_flag,gpu_aware_flag); + 1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag); } /* ---------------------------------------------------------------------- @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication @@ -862,8 +862,8 @@ void PPPMKokkos::allocate_peratom() npergrid = 7; - k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); } /* ---------------------------------------------------------------------- @@ -1234,14 +1234,14 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) const { // The density_brick array is atomic for Half/Thread neighbor style - Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; + Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); int nz = d_part2grid(i,2); - const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; @@ -1250,13 +1250,13 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c compute_rho1d(i,dx,dy,dz); - const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; - const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; - const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; a_density_brick(mz,my,mx) += x0*d_rho1d(i,l+order/2,0); @@ -1294,9 +1294,9 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team if ( ((nz+nlower-nzlo_out)*ix*iy >= ito) || ((nz+nupper-nzlo_out+1)*ix*iy < ifrom) ) continue; - const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; ny -= nylo_out; @@ -1304,15 +1304,15 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team compute_rho1d(i,dx,dy,dz); - const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const int in = mz*ix*iy; - const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; const int im = in+my*ix; - const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; const int il = im+mx; @@ -2040,8 +2040,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_ik, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_KOKKOS_SCALAR x0,y0,z0; - FFT_KOKKOS_SCALAR ekx,eky,ekz; + FFT_SCALAR x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2100,8 +2100,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_KOKKOS_SCALAR dx,dy,dz,x0,y0,z0; - FFT_KOKKOS_SCALAR u,v0,v1,v2,v3,v4,v5; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2155,7 +2155,7 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2211,7 +2211,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_forward2, const int &i) con ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2269,7 +2269,7 @@ void PPPMKokkos::operator()(TagPPPM_unpack_forward2, const int &i) c ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2299,7 +2299,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_reverse, const int &i) cons ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2332,11 +2332,11 @@ void PPPMKokkos::operator()(TagPPPM_unpack_reverse, const int &i) co template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &dx, const FFT_KOKKOS_SCALAR &dy, - const FFT_KOKKOS_SCALAR &dz) const +void PPPMKokkos::compute_rho1d(const int i, const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) const { int k,l; - FFT_KOKKOS_SCALAR r1,r2,r3; + FFT_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; @@ -2375,10 +2375,10 @@ template void PPPMKokkos::compute_rho_coeff() { int j,k,l,m; - FFT_KOKKOS_SCALAR s; - FFT_KOKKOS_SCALAR **a = new FFT_KOKKOS_SCALAR *[order]; + FFT_SCALAR s; + FFT_SCALAR **a = new FFT_SCALAR *[order]; for (int i = 0; i < order; ++i) - a[i] = new FFT_KOKKOS_SCALAR[2*order+1]; + a[i] = new FFT_SCALAR[2*order+1]; for (k = 0; k <= 2*order; k++) for (l = 0; l < order; l++) @@ -2390,7 +2390,7 @@ void PPPMKokkos::compute_rho_coeff() s = 0.0; for (l = 0; l < j; l++) { a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / (l+1); -#ifdef FFT_KOKKOS_SINGLE +#ifdef FFT_SINGLE s += powf(0.5,(float) l+1) * (a[l][k-1+order] + powf(-1.0,(float) l) * a[l][k+1+order]) / (l+1); #else @@ -2586,18 +2586,18 @@ double PPPMKokkos::memory_usage() double bytes = (double)nmax*3 * sizeof(double); int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * (nzhi_out-nzlo_out+1); - bytes += (double)4 * nbrick * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)4 * nbrick * sizeof(FFT_SCALAR); if (triclinic) bytes += (double)3 * nfft_both * sizeof(double); bytes += (double)6 * nfft_both * sizeof(double); bytes += (double)nfft_both * sizeof(double); - bytes += (double)nfft_both*5 * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)nfft_both*5 * sizeof(FFT_SCALAR); if (peratom_allocate_flag) - bytes += (double)6 * nbrick * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)6 * nbrick * sizeof(FFT_SCALAR); // two Grid3d bufs - bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR); return bytes; } diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 09513c9a2f..14d4670dbd 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,31 +364,31 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_density_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_u_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; typename AT::t_float_1d d_fky; typename AT::t_float_1d d_fkz; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; + FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft; + FFT_DAT::tdual_FFT_SCALAR_1d k_work1; + FFT_DAT::tdual_FFT_SCALAR_1d k_work2; + typename FFT_AT::t_FFT_SCALAR_1d d_density_fft; + typename FFT_AT::t_FFT_SCALAR_1d d_work1; + typename FFT_AT::t_FFT_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; - //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; + //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; + FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; + FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; @@ -398,7 +398,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { RemapKokkos *remap; Grid3dKokkos *gc; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_gc_buf1,k_gc_buf2; + FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2; int ngc_buf1,ngc_buf2,npergrid; //int **part2grid; // storage for particle -> grid mapping @@ -429,17 +429,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void fieldforce_peratom() override; KOKKOS_INLINE_FUNCTION - void compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &, const FFT_KOKKOS_SCALAR &, - const FFT_KOKKOS_SCALAR &) const; + void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &, + const FFT_SCALAR &) const; void compute_rho_coeff(); void slabcorr() override; // grid communication - void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; - void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; // triclinic diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index 18ba626460..efc6742a25 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_ ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL // use point-to-point communication int i,isend,irecv; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -120,20 +120,20 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL // post all recvs into scratch space - FFT_KOKKOS_SCALAR* v_scratch = d_scratch.data(); + FFT_SCALAR* v_scratch = d_scratch.data(); if (!plan->usecuda_aware) { plan->h_scratch = Kokkos::create_mirror_view(d_scratch); v_scratch = plan->h_scratch.data(); } for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_KOKKOS_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; + FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; MPI_Irecv(scratch,plan->recv_size[irecv], - MPI_FFT_KOKKOS_SCALAR,plan->recv_proc[irecv],0, + MPI_FFT_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); } - FFT_KOKKOS_SCALAR* v_sendbuf = plan->d_sendbuf.data(); + FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); if (!plan->usecuda_aware) { plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); v_sendbuf = plan->h_sendbuf.data(); @@ -149,7 +149,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL if (!plan->usecuda_aware) Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_KOKKOS_SCALAR, + MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, plan->send_proc[isend],0,plan->comm); } @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index ad5fa9833d..a62c14f00b 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -28,13 +28,13 @@ template struct remap_plan_3d_kokkos { typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends + FFT_HAT::t_FFT_SCALAR_1d h_sendbuf; // host buffer for MPI sends + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + FFT_HAT::t_FFT_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs + void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -66,11 +66,11 @@ class RemapKokkos : protected Pointers { RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d); + void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 6d1d515f3a3f7369f9ace5bef4dfcc1b81d6f80e Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 13 Dec 2023 15:32:32 -0700 Subject: [PATCH 034/448] Fix compile issue --- src/KOKKOS/pppm_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 6e1b3a83fa..2a53682df3 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -285,7 +285,7 @@ void PPPMKokkos::init() estimated_accuracy); mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n", estimated_accuracy/two_charge_force); - mesg += " using " LMP_FFT_KOKKOS_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; + mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n", ngrid_max,nfft_both_max); utils::logmesg(lmp,mesg); From b199368c19bdc5f1b5f0fc991473ae7de3f73d31 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Thu, 14 Dec 2023 10:46:32 +1000 Subject: [PATCH 035/448] add extract function to fix_property_atom --- src/fix_property_atom.cpp | 24 ++++++++++++++++++++++++ src/fix_property_atom.h | 1 + 2 files changed, 25 insertions(+) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 9613523059..1e004ae4cb 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -948,3 +948,27 @@ int FixPropertyAtom::size_restart(int /*nlocal*/) { return values_peratom + 1; } + +/* ---------------------------------------------------------------------- + extract fix property/atom properties +------------------------------------------------------------------------- */ + +void *FixPropertyAtom::extract(const char *str, int &dim) +{ + dim=0; + if (strcmp(str, "nvalue") == 0) { + return &nvalue; + } else if (strcmp(str, "border") == 0) { + return &border; + } + dim=1; + if (strcmp(str, "styles") == 0) { + return &styles; + } else if (strcmp(str, "index") == 0) { + return &index; + } else if (strcmp(str, "cols") == 0) { + return &cols; + } + return nullptr; +} + diff --git a/src/fix_property_atom.h b/src/fix_property_atom.h index c50b6049dc..820acf3a20 100644 --- a/src/fix_property_atom.h +++ b/src/fix_property_atom.h @@ -51,6 +51,7 @@ class FixPropertyAtom : public Fix { void unpack_restart(int, int) override; int size_restart(int) override; int maxsize_restart() override; + void *extract(const char *, int &) override; double memory_usage() override; protected: From c45183d45c98c151ae4227dd415859249afae766 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Thu, 14 Dec 2023 15:38:45 -0500 Subject: [PATCH 036/448] Updated CMake build system to allow for FFT_KOKKOS parameter. Updated CMakeLists.txt to print the correct value when summarizing. --- cmake/CMakeLists.txt | 16 +--------------- cmake/Modules/Packages/KOKKOS.cmake | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 28e02bbee7..aacaca4e6c 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -984,21 +984,7 @@ if(PKG_KSPACE) message(STATUS "Using non-threaded FFTs") endif() if(PKG_KOKKOS) - if(Kokkos_ENABLE_CUDA) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: cuFFT") - endif() - elseif(Kokkos_ENABLE_HIP) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: hipFFT") - endif() - else() - message(STATUS "Kokkos FFT: ${FFT}") - endif() + message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") endif() endif() if(BUILD_DOC) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 0edd9a3baa..eb20f93956 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -131,16 +131,31 @@ if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) + set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT) + set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES}) + validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES) + string(TOUPPER ${FFT_KOKKOS} FFT_KOKKOS) + if(Kokkos_ENABLE_CUDA) - if(NOT (FFT STREQUAL "KISS")) - target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) + if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "CUFFT"))) + message(FATAL_ERROR "The CUDA backend of Kokkos requires either KISS FFT or CUFFT.") + elseif(FFT_KOKKOS STREQUAL "KISS") + message(WARNING "Using KISS FFT with the CUDA backend of Kokkos may be sub-optimal.") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS) + elseif(FFT_KOKKOS STREQUAL "CUFFT") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_CUFFT) target_link_libraries(lammps PRIVATE cufft) endif() elseif(Kokkos_ENABLE_HIP) - if(NOT (FFT STREQUAL "KISS")) + if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "HIPFFT"))) + message(FATAL_ERROR "The HIP backend of Kokkos requires either KISS FFT or HIPFFT.") + elseif(FFT_KOKKOS STREQUAL "KISS") + message(WARNING "Using KISS FFT with the HIP backend of Kokkos may be sub-optimal.") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS) + elseif(FFT_KOKKOS STREQUAL "HIPFFT") include(DetectHIPInstallation) find_package(hipfft REQUIRED) - target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT) + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_HIPFFT) target_link_libraries(lammps PRIVATE hip::hipfft) endif() endif() From 54089fb5abbd18d91ad2b0f1665338df403396f1 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 10:58:06 +1000 Subject: [PATCH 037/448] Revert "add extract function to fix_property_atom" This reverts commit b199368c19bdc5f1b5f0fc991473ae7de3f73d31. --- src/fix_property_atom.cpp | 24 ------------------------ src/fix_property_atom.h | 1 - 2 files changed, 25 deletions(-) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 1e004ae4cb..9613523059 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -948,27 +948,3 @@ int FixPropertyAtom::size_restart(int /*nlocal*/) { return values_peratom + 1; } - -/* ---------------------------------------------------------------------- - extract fix property/atom properties -------------------------------------------------------------------------- */ - -void *FixPropertyAtom::extract(const char *str, int &dim) -{ - dim=0; - if (strcmp(str, "nvalue") == 0) { - return &nvalue; - } else if (strcmp(str, "border") == 0) { - return &border; - } - dim=1; - if (strcmp(str, "styles") == 0) { - return &styles; - } else if (strcmp(str, "index") == 0) { - return &index; - } else if (strcmp(str, "cols") == 0) { - return &cols; - } - return nullptr; -} - diff --git a/src/fix_property_atom.h b/src/fix_property_atom.h index 820acf3a20..c50b6049dc 100644 --- a/src/fix_property_atom.h +++ b/src/fix_property_atom.h @@ -51,7 +51,6 @@ class FixPropertyAtom : public Fix { void unpack_restart(int, int) override; int size_restart(int) override; int maxsize_restart() override; - void *extract(const char *, int &) override; double memory_usage() override; protected: From 44fbcf7bfe2b8fd27c2cacf171fdc8dc6219ec69 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 11:01:50 +1000 Subject: [PATCH 038/448] reorder "ghost" processing in fix property/atom --- src/fix_property_atom.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 9613523059..93e33ca056 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -51,6 +51,19 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : nvalue = 0; values_peratom = 0; + // get "ghost" first for settings + + border = 0; + while (iarg < narg) { + if (strcmp(arg[iarg], "ghost") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command"); + border = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else iarg++; + } + + iarg = 3; + while (iarg < narg) { if (strcmp(arg[iarg], "mol") == 0) { if (atom->molecule_flag) @@ -168,11 +181,8 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : // optional args - border = 0; while (iarg < narg) { if (strcmp(arg[iarg], "ghost") == 0) { - if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command"); - border = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; } else if (strcmp(arg[iarg], "writedata") == 0) { if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command"); From 61ca9b79db605dae37450c2b20d12d1f88ce817e Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 11:48:46 +1000 Subject: [PATCH 039/448] add custom_border to Atom and AtomKokkos --- src/KOKKOS/atom_kokkos.cpp | 7 ++++++- src/KOKKOS/atom_kokkos.h | 2 +- src/atom.cpp | 13 ++++++++++++- src/atom.h | 4 +++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index bc393b29d8..ecd618e7ac 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -297,7 +297,7 @@ void AtomKokkos::grow(unsigned int mask) return index in ivector or dvector of its location ------------------------------------------------------------------------- */ -int AtomKokkos::add_custom(const char *name, int flag, int cols) +int AtomKokkos::add_custom(const char *name, int flag, int cols, int border) { int index; @@ -342,6 +342,11 @@ int AtomKokkos::add_custom(const char *name, int flag, int cols) dcols[index] = cols; } + if (index < 0) + error->all(FLERR,"Invalid call to AtomKokkos::add_custom()"); + else + custom_border[flag + (cols) ? 2 : 0].push_back(border); + return index; } diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 21a9aeebbd..000ad5e112 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -154,7 +154,7 @@ class AtomKokkos : public Atom { void sync_overlapping_device(const ExecutionSpace space, unsigned int mask); void sort() override; virtual void grow(unsigned int mask); - int add_custom(const char *, int, int) override; + int add_custom(const char *, int, int, int border = 0) override; void remove_custom(int, int, int) override; virtual void deallocate_topology(); private: diff --git a/src/atom.cpp b/src/atom.cpp index b604c54e6b..8ac72e8950 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -2605,6 +2605,7 @@ void Atom::update_callback(int ifix) lists of names can have NULL entries if previously removed return flag = 0/1 for int/double return cols = 0/N for vector/array where N = # of columns + return border = 0/1 if fix property/atom has "ghost" no/yes ------------------------------------------------------------------------- */ int Atom::find_custom(const char *name, int &flag, int &cols) @@ -2642,6 +2643,13 @@ int Atom::find_custom(const char *name, int &flag, int &cols) return -1; } +int Atom::find_custom(const char *name, int &flag, int &cols, int &border) +{ + int i = find_custom(name, flag, cols); + if (i != -1) border = custom_border[flag + (cols) ? 2 : 0][i]; + return i; +} + /** \brief Add a custom per-atom property with the given name and type and size \verbatim embed:rst @@ -2654,7 +2662,7 @@ This function is called, e.g. from :doc:`fix property/atom `. * \param cols Number of values: 0 for a single value, 1 or more for a vector of values * \return index of property in the respective list of properties */ -int Atom::add_custom(const char *name, int flag, int cols) +int Atom::add_custom(const char *name, int flag, int cols, int border) { int index = -1; @@ -2697,6 +2705,9 @@ int Atom::add_custom(const char *name, int flag, int cols) if (index < 0) error->all(FLERR,"Invalid call to Atom::add_custom()"); + else + custom_border[flag + (cols) ? 2 : 0].push_back(border); + return index; } diff --git a/src/atom.h b/src/atom.h index 548168ac59..9724e5662f 100644 --- a/src/atom.h +++ b/src/atom.h @@ -242,6 +242,7 @@ class Atom : protected Pointers { int *icols, *dcols; char **ivname, **dvname, **ianame, **daname; int nivector, ndvector, niarray, ndarray; + std::array, 4> custom_border; // molecule templates // each template can be a set of consecutive molecules @@ -363,7 +364,8 @@ class Atom : protected Pointers { void update_callback(int); int find_custom(const char *, int &, int &); - virtual int add_custom(const char *, int, int); + int find_custom(const char *, int &, int &, int &); + virtual int add_custom(const char *, int, int, int border = 0); virtual void remove_custom(int, int, int); void *extract(const char *); From 09c87040b54b7bea54a0b7528cea0801ef94e81b Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 11:54:15 +1000 Subject: [PATCH 040/448] add border arguments to FixPropertyAtom add_custom functions --- src/fix_property_atom.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 93e33ca056..c3af7c2f1a 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -125,7 +125,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : if (index[nvalue] >= 0) error->all(FLERR, "Fix property/atom vector name already exists"); if (ReadData::is_data_section(id)) error->all(FLERR, "Fix property/atom fix ID must not be a data file section name"); - index[nvalue] = atom->add_custom(&arg[iarg][2], 0, 0); + index[nvalue] = atom->add_custom(&arg[iarg][2], 0, 0, border); cols[nvalue] = 0; values_peratom++; nvalue++; @@ -138,7 +138,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : if (index[nvalue] >= 0) error->all(FLERR, "Fix property/atom vector name already exists"); if (ReadData::is_data_section(id)) error->all(FLERR, "Fix property/atom fix ID must not be a data file section name"); - index[nvalue] = atom->add_custom(&arg[iarg][2], 1, 0); + index[nvalue] = atom->add_custom(&arg[iarg][2], 1, 0, border); cols[nvalue] = 0; values_peratom++; nvalue++; @@ -167,7 +167,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : which = 1; styles[nvalue] = DARRAY; } - index[nvalue] = atom->add_custom(&arg[iarg][3], which, ncols); + index[nvalue] = atom->add_custom(&arg[iarg][3], which, ncols, border); cols[nvalue] = ncols; values_peratom += ncols; nvalue++; From a873106790223b3a72ed4d9293f55a2a8ba59bcc Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 12:26:05 +1000 Subject: [PATCH 041/448] improve AMOEBA fix property/atom checks --- src/AMOEBA/pair_amoeba.cpp | 43 ++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/AMOEBA/pair_amoeba.cpp b/src/AMOEBA/pair_amoeba.cpp index cad9e2b628..c94faf91a7 100644 --- a/src/AMOEBA/pair_amoeba.cpp +++ b/src/AMOEBA/pair_amoeba.cpp @@ -827,28 +827,31 @@ void PairAmoeba::init_style() // check if all custom atom arrays were set via fix property/atom - int flag,cols; + char const * names[6] = {"amtype", "amgroup", "redID", + "xyzaxis", "polaxe", "pval"}; + int const flag_check[6] = {0, 0, 1, 1, 0, 1}; // correct type (0 int, 1 dbl) + int const cols_check[6] = {0, 0, 0, 3, 0, 0}; // xyzaxis 3 cols, all others 0 + int const border_check[6] = {1, 0, 0, 0, 0, 0}; // which types need ghost + int flag, cols, border; + int index[6]; - index_amtype = atom->find_custom("amtype",flag,cols); - if (index_amtype < 0 || flag || cols) - error->all(FLERR,"Pair {} amtype is not defined", mystyle); - index_amgroup = atom->find_custom("amgroup",flag,cols); - if (index_amgroup < 0 || flag || cols) - error->all(FLERR,"Pair {} amgroup is not defined", mystyle); + for (int i = 0; i < 6; i++) { + index[i] = atom->find_custom(names[i], flag, cols, border); + std::string err = ""; + if (index[i] < 0) err = "was not defined"; + else if (flag_check[i] != flag) err = "has the wrong type"; + else if (cols_check[i] != cols) err = "has the wrong number of columns"; + else if (border_check[i] && !border) err = "must be set by fix property/atom with ghost yes"; + if (err != "") + error->all(FLERR,"Pair {} per-atom variable {} {}", mystyle, names[i], err); + } - index_redID = atom->find_custom("redID",flag,cols); - if (index_redID < 0 || !flag || cols) - error->all(FLERR,"Pair {} redID is not defined", mystyle); - index_xyzaxis = atom->find_custom("xyzaxis",flag,cols); - if (index_xyzaxis < 0 || !flag || cols == 0) - error->all(FLERR,"Pair {} xyzaxis is not defined", mystyle); - - index_polaxe = atom->find_custom("polaxe",flag,cols); - if (index_polaxe < 0 || flag || cols) - error->all(FLERR,"Pair {} polaxe is not defined", mystyle); - index_pval = atom->find_custom("pval",flag,cols); - if (index_pval < 0 || !flag || cols) - error->all(FLERR,"Pair {} pval is not defined", mystyle); + index_amtype = index[0]; + index_amgroup = index[1]; + index_redID = index[2]; + index_xyzaxis = index[3]; + index_polaxe = index[4]; + index_pval = index[5]; // ------------------------------------------------------------------- // one-time initializations From e36a764db2a62ad45024e5c7fbdb8482ece4ec66 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 12:44:55 +1000 Subject: [PATCH 042/448] add array and vector STL headers --- src/atom.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/atom.h b/src/atom.h index 9724e5662f..6f22ebd160 100644 --- a/src/atom.h +++ b/src/atom.h @@ -18,6 +18,8 @@ #include #include +#include +#include namespace LAMMPS_NS { From 4d2aed89374760f611e23c0d121d92a15b01e5b1 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Mon, 18 Dec 2023 11:15:56 -0500 Subject: [PATCH 043/448] bug fix for when reaction site has angles, but post-reaction template has none (same for dihedrals, impropers) --- src/REACTION/fix_bond_react.cpp | 180 +++++++++++++++++--------------- 1 file changed, 93 insertions(+), 87 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 1da26e32a1..10a7023e17 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -3348,7 +3348,7 @@ void FixBondReact::update_everything() dynamic_cast(ihistory)->clear_cache(); // Angles! First let's delete all angle info: - if (force->angle && twomol->angleflag) { + if (force->angle) { int *num_angle = atom->num_angle; int **angle_type = atom->angle_type; tagint **angle_atom1 = atom->angle_atom1; @@ -3389,33 +3389,35 @@ void FixBondReact::update_everything() } } // now let's add the new angle info. - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; - delta_angle += twomol->num_angle[j]; - for (int p = 0; p < twomol->num_angle[j]; p++) { - angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + if (twomol->angleflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; + delta_angle += twomol->num_angle[j]; + for (int p = 0; p < twomol->num_angle[j]; p++) { + angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_angle[j]; p++) { - if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { - insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; - angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; - num_angle[atom->map(update_mega_glove[jj+1][i])]++; - if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_angle++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_angle[j]; p++) { + if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { + insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; + angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + num_angle[atom->map(update_mega_glove[jj+1][i])]++; + if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_angle++; + } } } } @@ -3425,7 +3427,7 @@ void FixBondReact::update_everything() } // Dihedrals! first let's delete all dihedral info for landlocked atoms - if (force->dihedral && twomol->dihedralflag) { + if (force->dihedral) { int *num_dihedral = atom->num_dihedral; int **dihedral_type = atom->dihedral_type; tagint **dihedral_atom1 = atom->dihedral_atom1; @@ -3469,36 +3471,38 @@ void FixBondReact::update_everything() } } // now let's add new dihedral info - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; - delta_dihed += twomol->num_dihedral[j]; - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + if (twomol->dihedralflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; + delta_dihed += twomol->num_dihedral[j]; + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; - dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; - num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; - if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_dihed++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; + dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; + if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_dihed++; + } } } } @@ -3508,7 +3512,7 @@ void FixBondReact::update_everything() } // finally IMPROPERS!!!! first let's delete all improper info for landlocked atoms - if (force->improper && twomol->improperflag) { + if (force->improper) { int *num_improper = atom->num_improper; int **improper_type = atom->improper_type; tagint **improper_atom1 = atom->improper_atom1; @@ -3552,36 +3556,38 @@ void FixBondReact::update_everything() } } // now let's add new improper info - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; - delta_imprp += twomol->num_improper[j]; - for (int p = 0; p < twomol->num_improper[j]; p++) { - improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + if (twomol->improperflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; + delta_imprp += twomol->num_improper[j]; + for (int p = 0; p < twomol->num_improper[j]; p++) { + improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_improper[j]; p++) { - if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; - improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; - num_improper[atom->map(update_mega_glove[jj+1][i])]++; - if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_imprp++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_improper[j]; p++) { + if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; + improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + num_improper[atom->map(update_mega_glove[jj+1][i])]++; + if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_imprp++; + } } } } From a6addbc90761fd9b3e48e5a3064f8f589a58140e Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 11:30:56 -0500 Subject: [PATCH 044/448] Updated documentation for FFT_KOKKOS_ flags and CMake variable selection --- doc/src/Build_settings.rst | 18 +++++++++++++++--- doc/src/Howto_cmake.rst | 2 ++ doc/src/kspace_style.rst | 5 ++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index 7576cae3eb..33b0508fe9 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -51,14 +51,18 @@ LAMMPS can use them if they are available on your system. .. code-block:: bash -D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS + -D FFT_KOKKOS=value # FFTW3 or MKL or KISS or CUFFT or HIPFFT, default is KISS -D FFT_SINGLE=value # yes or no (default), no = double precision -D FFT_PACK=value # array (default) or pointer or memcpy .. note:: - The values for the FFT variable must be in upper-case. This is - an exception to the rule that all CMake variables can be specified - with lower-case values. + When the Kokkos variant of a package is compiled and selected at run time, + the FFT library selected by the FFT_KOKKOS variable applies. Otherwise, + the FFT library selected by the FFT variable applies. + The same FFT settings apply to both. FFT_KOKKOS must be compatible with the + Kokkos backend - for example, when using the CUDA backend of Kokkos, + you must use either CUFFT or KISS. Usually these settings are all that is needed. If FFTW3 is selected, then CMake will try to detect, if threaded FFTW @@ -87,6 +91,8 @@ LAMMPS can use them if they are available on your system. FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS # default is KISS if not specified + FFT_INC = -DFFT_KOKKOS_CUFFT # -DFFT_KOKKOS_{FFTW,FFTW3,MKL,CUFFT,HIPFFT,KISS} + # default is KISS if not specified FFT_INC = -DFFT_SINGLE # do not specify for double precision FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries @@ -97,6 +103,8 @@ LAMMPS can use them if they are available on your system. FFT_INC = -I/usr/local/include FFT_PATH = -L/usr/local/lib + FFT_LIB = -lhipfft # hipFFT either precision + FFT_LIB = -lcufft # cuFFT either precision FFT_LIB = -lfftw3 # FFTW3 double precision FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS) FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision @@ -141,6 +149,10 @@ The Intel MKL math library is part of the Intel compiler suite. It can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting above). +The CUFFT and HIPFFT FFT libraries are packaged with NVIDIA's CUDA and AMD's +HIP installations, respectively. These FFT libraries require the Kokkos acceleration +package to be enabled and the Kokkos backend to be GPU-resident (ie, HIP or CUDA). + Performing 3d FFTs in parallel can be time-consuming due to data access and required communication. This cost can be reduced by performing single-precision FFTs instead of double precision. Single precision diff --git a/doc/src/Howto_cmake.rst b/doc/src/Howto_cmake.rst index 42324cf2f1..8b710d1065 100644 --- a/doc/src/Howto_cmake.rst +++ b/doc/src/Howto_cmake.rst @@ -349,6 +349,8 @@ Some common LAMMPS specific variables - when set to ``name`` the LAMMPS executable and library will be called ``lmp_name`` and ``liblammps_name.a`` * - ``FFT`` - select which FFT library to use: ``FFTW3``, ``MKL``, ``KISS`` (default, unless FFTW3 is found) + * - ``FFT_KOKKOS`` + - select which FFT library to use in Kokkos-enabled styles: ``FFTW3``, ``MKL``, ``HIPFFT``, ``CUFFT``, ``KISS`` (default) * - ``FFT_SINGLE`` - select whether to use single precision FFTs (default: ``off``) * - ``WITH_JPEG`` diff --git a/doc/src/kspace_style.rst b/doc/src/kspace_style.rst index 38a6fce375..78d7380c01 100644 --- a/doc/src/kspace_style.rst +++ b/doc/src/kspace_style.rst @@ -450,7 +450,10 @@ relative RMS error. For the KOKKOS package, the *pppm/kk* style performs charge assignment and force interpolation calculations, along with the FFTs themselves, on the GPU or (optionally) threaded on the CPU when - using OpenMP and FFTW3. + using OpenMP and FFTW3. The specific FFT library is selected using + the FFT_KOKKOS CMake parameter. See the + :doc:`Build settings ` doc page for how to select a + 3rd-party FFT library. ---------- From bc47f4f3a32c8499d5bd5fd6bc4a68424b700da5 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 11:56:23 -0500 Subject: [PATCH 045/448] Updated CMake preset files for kokkos-cuda and kokkos-hip --- cmake/presets/kokkos-cuda.cmake | 3 +++ cmake/presets/kokkos-hip.cmake | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cmake/presets/kokkos-cuda.cmake b/cmake/presets/kokkos-cuda.cmake index c3ee081898..3205387044 100644 --- a/cmake/presets/kokkos-cuda.cmake +++ b/cmake/presets/kokkos-cuda.cmake @@ -9,5 +9,8 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE) get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokkos/bin/nvcc_wrapper ABSOLUTE) set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE) +# If KSPACE is also enabled, use CUFFT for FFTs +set(FFT_KOKKOS "CUFFT" CACHE STRING FORCE) + # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) diff --git a/cmake/presets/kokkos-hip.cmake b/cmake/presets/kokkos-hip.cmake index 827a37152b..ffc259a225 100644 --- a/cmake/presets/kokkos-hip.cmake +++ b/cmake/presets/kokkos-hip.cmake @@ -12,6 +12,9 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE) set(CMAKE_CXX_COMPILER hipcc CACHE STRING "" FORCE) set(CMAKE_TUNE_FLAGS "-munsafe-fp-atomics" CACHE STRING "" FORCE) +# If KSPACE is also enabled, use CUFFT for FFTs +set(FFT_KOKKOS "HIPFFT" CACHE STRING FORCE) + # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) From d02ffb0e709cb57ff0959c74d74a7a0ad9b7670e Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:06:41 -0500 Subject: [PATCH 046/448] Updated Summit & Frontier template Makefiles --- src/MAKE/MACHINES/Makefile.frontier_kokkos | 2 +- src/MAKE/MACHINES/Makefile.summit_kokkos | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.frontier_kokkos b/src/MAKE/MACHINES/Makefile.frontier_kokkos index 86cddd12b7..b58a3d871c 100644 --- a/src/MAKE/MACHINES/Makefile.frontier_kokkos +++ b/src/MAKE/MACHINES/Makefile.frontier_kokkos @@ -55,7 +55,7 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa MY_HIP_EXE = $(shell which hipcc) MY_HIP_PATH = $(dir ${MY_HIP_EXE}) -FFT_INC = -DFFT_HIPFFT +FFT_INC = -DFFT_KOKKOS_HIPFFT FFT_PATH = FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft diff --git a/src/MAKE/MACHINES/Makefile.summit_kokkos b/src/MAKE/MACHINES/Makefile.summit_kokkos index 557ebd22b2..d554e09a5a 100644 --- a/src/MAKE/MACHINES/Makefile.summit_kokkos +++ b/src/MAKE/MACHINES/Makefile.summit_kokkos @@ -57,7 +57,7 @@ MPI_LIB = -L${MY_MPI_PATH}../lib -lmpi_ibm # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_CUFFT +FFT_INC = -DFFT_KOKKOS_CUFFT FFT_PATH = FFT_LIB = -lcufft From bc7050ab5001b4480383d9e16995494a25f1bec8 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:11:31 -0500 Subject: [PATCH 047/448] Added LMP_HEFFTE to CMakeLists.txt to attempt to fix a merge conflict --- cmake/CMakeLists.txt | 46 +++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index aacaca4e6c..76248445e9 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -971,20 +971,40 @@ if(PKG_KOKKOS) endif() endif() if(PKG_KSPACE) - message(STATUS "<<< FFT settings >>> + if (LMP_HEFFTE) + message(STATUS "<<< FFT settings >>> +-- Primary FFT lib: heFFTe") + if (HEFFTE_BACKEND) + message(STATUS "heFFTe backend: ${HEFFTE_BACKEND}") + else() + message(STATUS "heFFTe backend: stock (builtin FFT implementation, tested for corrected but not optimized for production)") + endif() + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() + else() + message(STATUS "<<< FFT settings >>> -- Primary FFT lib: ${FFT}") - if(FFT_SINGLE) - message(STATUS "Using single precision FFTs") - else() - message(STATUS "Using double precision FFTs") - endif() - if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) - message(STATUS "Using threaded FFTs") - else() - message(STATUS "Using non-threaded FFTs") - endif() - if(PKG_KOKKOS) - message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() + if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) + message(STATUS "Using threaded FFTs") + else() + message(STATUS "Using non-threaded FFTs") + endif() + if (FFT_HEFFTE) + message(STATUS "Using distributed algorithms from heFTTe") + else() + message(STATUS "Using builtin distributed algorithms") + endif() + if(PKG_KOKKOS) + message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") + endif() endif() endif() if(BUILD_DOC) From dd1ac640aeec2686b2757d734546d6960804bcc2 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:56:30 -0500 Subject: [PATCH 048/448] Added declaration for FFT_KOKKOS variable --- cmake/Modules/Packages/KOKKOS.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index eb20f93956..a0b872ba85 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -131,6 +131,7 @@ if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) + set(FFT_KOKKOS "KISS" CACHE STRING "FFT library for Kokkos-enabled KSPACE package") set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT) set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES}) validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES) From e69a079545dcf9801f0099c268fb781567fe7b61 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 20 Dec 2023 10:50:54 -0700 Subject: [PATCH 049/448] Fixing invalid variable deform kokkos --- src/KOKKOS/fix_deform_kokkos.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_deform_kokkos.cpp b/src/KOKKOS/fix_deform_kokkos.cpp index 104ac41188..7a2b3e2481 100644 --- a/src/KOKKOS/fix_deform_kokkos.cpp +++ b/src/KOKKOS/fix_deform_kokkos.cpp @@ -120,11 +120,11 @@ void FixDeformKokkos::end_of_step() } else if (set[i].style == WIGGLE) { double delt = (update->ntimestep - update->beginstep) * update->dt; set[i].lo_target = set[i].lo_start - - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); + 0.5*set[i].amplitude * sin(MY_2PI*delt/set[i].tperiod); set[i].hi_target = set[i].hi_start + - 0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * - cos(TWOPI*delt/set[i].tperiod); + 0.5*set[i].amplitude * sin(MY_2PI*delt/set[i].tperiod); + h_rate[i] = MY_2PI/set[i].tperiod * set[i].amplitude * + cos(MY_2PI*delt/set[i].tperiod); h_ratelo[i] = -0.5*h_rate[i]; } else if (set[i].style == VARIABLE) { double del = input->variable->compute_equal(set[i].hvar); @@ -212,9 +212,9 @@ void FixDeformKokkos::end_of_step() } else if (set[i].style == WIGGLE) { double delt = (update->ntimestep - update->beginstep) * update->dt; set[i].tilt_target = set[i].tilt_start + - set[i].amplitude * sin(TWOPI*delt/set[i].tperiod); - h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude * - cos(TWOPI*delt/set[i].tperiod); + set[i].amplitude * sin(MY_2PI*delt/set[i].tperiod); + h_rate[i] = MY_2PI/set[i].tperiod * set[i].amplitude * + cos(MY_2PI*delt/set[i].tperiod); } else if (set[i].style == VARIABLE) { double delta_tilt = input->variable->compute_equal(set[i].hvar); set[i].tilt_target = set[i].tilt_start + delta_tilt; From d61c379b0caa0b50b6fac0942c7bba191c4b4143 Mon Sep 17 00:00:00 2001 From: Ludwig Ahrens Date: Thu, 21 Dec 2023 14:28:54 +0100 Subject: [PATCH 050/448] Error formatting --- src/ELECTRODE/fix_electrode_conp.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ELECTRODE/fix_electrode_conp.cpp b/src/ELECTRODE/fix_electrode_conp.cpp index a87642182b..091840148d 100644 --- a/src/ELECTRODE/fix_electrode_conp.cpp +++ b/src/ELECTRODE/fix_electrode_conp.cpp @@ -522,11 +522,9 @@ void FixElectrodeConp::setup_post_neighbor() if (qtotal_var_style == VarStyle::EQUAL) { const char *var_name = qtotal_var_name.c_str(); int var_id = input->variable->find(var_name); - if (var_id < 0) - error->all(FLERR, fmt::format("Variable '{}' for fix electrode does not exist", var_name)); + if (var_id < 0) error->all(FLERR, "Variable '{}' for fix electrode does not exist", var_name); if (!input->variable->equalstyle(var_id)) - error->all(FLERR, - fmt::format("Variable '{}' for fix electrode is not equal-style", var_name)); + error->all(FLERR, "Variable '{}' for fix electrode is not equal-style", var_name); qtotal_var_id = var_id; } From 0562c3113879f38f8fc6db7afb88830ecb3ae10c Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Tue, 2 Jan 2024 04:51:10 -0500 Subject: [PATCH 051/448] added pair/lj/charmmfsw/coul/long/kk and dihedral/charmmfsw/kk so that lammps scripts generated by charmm-gui.org can be run without tweaks --- src/KOKKOS/Install.sh | 4 + src/KOKKOS/dihedral_charmmfsw_kokkos.cpp | 991 ++++++++++++++++++ src/KOKKOS/dihedral_charmmfsw_kokkos.h | 267 +++++ .../pair_lj_charmmfsw_coul_long_kokkos.cpp | 941 +++++++++++++++++ .../pair_lj_charmmfsw_coul_long_kokkos.h | 230 ++++ 5 files changed, 2433 insertions(+) create mode 100644 src/KOKKOS/dihedral_charmmfsw_kokkos.cpp create mode 100644 src/KOKKOS/dihedral_charmmfsw_kokkos.h create mode 100644 src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp create mode 100644 src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index af80420d7a..462c0cbe57 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -106,6 +106,8 @@ action compute_temp_kokkos.cpp action compute_temp_kokkos.h action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp action dihedral_charmm_kokkos.h dihedral_charmm.h +action dihedral_charmmfsw_kokkos.cpp dihedral_charmmfsw.cpp +action dihedral_charmmfsw_kokkos.h dihedral_charmmfsw.h action dihedral_class2_kokkos.cpp dihedral_class2.cpp action dihedral_class2_kokkos.h dihedral_class2.h action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp @@ -310,6 +312,8 @@ action pair_lj_charmm_coul_charmm_kokkos.cpp pair_lj_charmm_coul_charmm.cpp action pair_lj_charmm_coul_charmm_kokkos.h pair_lj_charmm_coul_charmm.h action pair_lj_charmm_coul_long_kokkos.cpp pair_lj_charmm_coul_long.cpp action pair_lj_charmm_coul_long_kokkos.h pair_lj_charmm_coul_long.h +action pair_lj_charmmfsw_coul_long_kokkos.cpp pair_lj_charmmfsw_coul_long.cpp +action pair_lj_charmmfsw_coul_long_kokkos.h pair_lj_charmmfsw_coul_long.h action pair_lj_class2_coul_cut_kokkos.cpp pair_lj_class2_coul_cut.cpp action pair_lj_class2_coul_cut_kokkos.h pair_lj_class2_coul_cut.h action pair_lj_class2_coul_long_kokkos.cpp pair_lj_class2_coul_long.cpp diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp new file mode 100644 index 0000000000..facb723580 --- /dev/null +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp @@ -0,0 +1,991 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + + Contributing authors: + + - Stan Moore (SNL) original DihedralCharmmfswKokkos + + - Mitch Murphy (alphataubio) - DihedralCharmmfswKokkos update (2023/12) + + Based on serial dihedral_charmmfsw.cpp lj-fsw sections (force-switched) + provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen + University, Germany, with additional assistance from + Robert A. Latour, Clemson University. + +------------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of DihedralCharmmfswKokkos exactly + same as DihedralCharmmfswKokkos but with new class name + + method: track changes from serial kspace dihedral_charmm to + dihedral_charmmfsw and apply to DihedralCharmmfswKokkos + + % diff dihedral_charmm.cpp dihedral_charmmfsw.cpp + +------------------------------------------------------------------------- */ + +/* + 18c21 + < #include "dihedral_charmm.h" + --- + > #include "dihedral_charmmfsw.h" + + */ + +#include "dihedral_charmmfsw_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "neighbor_kokkos.h" +#include "pair.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define TOLERANCE 0.05 + +/* ---------------------------------------------------------------------- */ + +/* + + 40c43 + < DihedralCharmm::DihedralCharmm(LAMMPS *_lmp) : Dihedral(_lmp) + --- + > DihedralCharmmfsw::DihedralCharmmfsw(LAMMPS *_lmp) : Dihedral(_lmp) + + */ + +template +DihedralCharmmfswKokkos::DihedralCharmmfswKokkos(LAMMPS *lmp) : DihedralCharmmfsw(lmp) +{ + atomKK = (AtomKokkos *) atom; + neighborKK = (NeighborKokkos *) neighbor; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK | TYPE_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + k_warning_flag = Kokkos::DualView("Dihedral:warning_flag"); + d_warning_flag = k_warning_flag.template view(); + h_warning_flag = k_warning_flag.h_view; + + centroidstressflag = CENTROID_NOTAVAIL; +} + +/* ---------------------------------------------------------------------- */ + +/* + + 48c51 + < DihedralCharmm::~DihedralCharmm() + --- + > DihedralCharmmfsw::~DihedralCharmmfsw() + + */ + +template +DihedralCharmmfswKokkos::~DihedralCharmmfswKokkos() +{ + if (!copymode) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +/* + + 73c76 + < double delx, dely, delz, rsq, r2inv, r6inv; + --- + > double delx, dely, delz, rsq, r2inv, r6inv, r; + 255a259,264 + > // modifying coul and LJ force and energies to apply + > // force_shift and force_switch as in CHARMM pairwise + > // LJ interactions between 1-4 atoms should usually be + > // for r < cut_inner, so switching not applied + > + > r = sqrt(rsq); + 258c267 + < else + --- + > else if (dihedflag) + 259a269,270 + > else + > forcecoul = qqrd2e * q[i1] * q[i4] * (sqrt(r2inv) - r * cut_coulinv14 * cut_coulinv14); + 264,265c275,284 + < ecoul = weight[type] * forcecoul; + < evdwl = r6inv * (lj14_3[itype][jtype] * r6inv - lj14_4[itype][jtype]); + --- + > if (dihedflag) + > ecoul = weight[type] * forcecoul; + > else + > ecoul = weight[type] * qqrd2e * q[i1] * q[i4] * + > (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); + > evdwl14_12 = r6inv * lj14_3[itype][jtype] * r6inv - + > lj14_3[itype][jtype] * cut_lj_inner6inv * cut_lj6inv; + > evdwl14_6 = + > -lj14_4[itype][jtype] * r6inv + lj14_4[itype][jtype] * cut_lj_inner3inv * cut_lj3inv; + > evdwl = evdwl14_12 + evdwl14_6; + + */ + + +/* + + 63c66 + < void DihedralCharmm::compute(int eflag, int vflag) + --- + > void DihedralCharmmfsw::compute(int eflag, int vflag) + + */ + +template +void DihedralCharmmfswKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (lmp->kokkos->neighflag == FULL) + error->all(FLERR,"Dihedral_style charmm/kk requires half neighbor list"); + + ev_init(eflag,vflag,0); + + // ensure pair->ev_tally() will use 1-4 virial contribution + + if (weightflag && vflag_global == VIRIAL_FDOTR) + force->pair->vflag_either = force->pair->vflag_global = 1; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + //if(k_eatom.extent(0)destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + d_eatom = k_eatom.template view(); + k_eatom_pair = Kokkos::DualView("dihedral:eatom_pair",maxeatom); + d_eatom_pair = k_eatom_pair.template view(); + //} + } + if (vflag_atom) { + //if(k_vatom.extent(0)destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom"); + d_vatom = k_vatom.template view(); + k_vatom_pair = Kokkos::DualView("dihedral:vatom_pair",maxvatom); + d_vatom_pair = k_vatom_pair.template view(); + //} + } + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + atomtype = atomKK->k_type.view(); + neighborKK->k_dihedrallist.template sync(); + dihedrallist = neighborKK->k_dihedrallist.view(); + int ndihedrallist = neighborKK->ndihedrallist; + nlocal = atom->nlocal; + newton_bond = force->newton_bond; + qqrd2e = force->qqrd2e; + + h_warning_flag() = 0; + k_warning_flag.template modify(); + k_warning_flag.template sync(); + + copymode = 1; + + // loop over neighbors of my atoms + + EVM_FLOAT evm; + + if (evflag) { + if (newton_bond) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + } + } else { + if (newton_bond) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } + } + + // error check + + k_warning_flag.template modify(); + k_warning_flag.template sync(); + if (h_warning_flag()) + error->warning(FLERR,"Dihedral problem"); + + if (eflag_global) { + energy += evm.emol; + force->pair->eng_vdwl += evm.evdwl; + force->pair->eng_coul += evm.ecoul; + } + if (vflag_global) { + virial[0] += evm.v[0]; + virial[1] += evm.v[1]; + virial[2] += evm.v[2]; + virial[3] += evm.v[3]; + virial[4] += evm.v[4]; + virial[5] += evm.v[5]; + + force->pair->virial[0] += evm.vp[0]; + force->pair->virial[1] += evm.vp[1]; + force->pair->virial[2] += evm.vp[2]; + force->pair->virial[3] += evm.vp[3]; + force->pair->virial[4] += evm.vp[4]; + force->pair->virial[5] += evm.vp[5]; + } + + // don't yet have dualviews for eatom and vatom in pair_kokkos, + // so need to manually copy these to pair style + + int n = nlocal; + if (newton_bond) n += atom->nghost; + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + + k_eatom_pair.template modify(); + k_eatom_pair.template sync(); + for (int i = 0; i < n; i++) + force->pair->eatom[i] += k_eatom_pair.h_view(i); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + + k_vatom_pair.template modify(); + k_vatom_pair.template sync(); + for (int i = 0; i < n; i++) { + force->pair->vatom[i][0] += k_vatom_pair.h_view(i,0); + force->pair->vatom[i][1] += k_vatom_pair.h_view(i,1); + force->pair->vatom[i][2] += k_vatom_pair.h_view(i,2); + force->pair->vatom[i][3] += k_vatom_pair.h_view(i,3); + force->pair->vatom[i][4] += k_vatom_pair.h_view(i,4); + force->pair->vatom[i][5] += k_vatom_pair.h_view(i,5); + } + } + + copymode = 0; +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n, EVM_FLOAT& evm) const { + + // The f array is atomic + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; + + const int i1 = dihedrallist(n,0); + const int i2 = dihedrallist(n,1); + const int i3 = dihedrallist(n,2); + const int i4 = dihedrallist(n,3); + const int type = dihedrallist(n,4); + + // 1st bond + + const F_FLOAT vb1x = x(i1,0) - x(i2,0); + const F_FLOAT vb1y = x(i1,1) - x(i2,1); + const F_FLOAT vb1z = x(i1,2) - x(i2,2); + + // 2nd bond + + const F_FLOAT vb2x = x(i3,0) - x(i2,0); + const F_FLOAT vb2y = x(i3,1) - x(i2,1); + const F_FLOAT vb2z = x(i3,2) - x(i2,2); + + const F_FLOAT vb2xm = -vb2x; + const F_FLOAT vb2ym = -vb2y; + const F_FLOAT vb2zm = -vb2z; + + // 3rd bond + + const F_FLOAT vb3x = x(i4,0) - x(i3,0); + const F_FLOAT vb3y = x(i4,1) - x(i3,1); + const F_FLOAT vb3z = x(i4,2) - x(i3,2); + + const F_FLOAT ax = vb1y*vb2zm - vb1z*vb2ym; + const F_FLOAT ay = vb1z*vb2xm - vb1x*vb2zm; + const F_FLOAT az = vb1x*vb2ym - vb1y*vb2xm; + const F_FLOAT bx = vb3y*vb2zm - vb3z*vb2ym; + const F_FLOAT by = vb3z*vb2xm - vb3x*vb2zm; + const F_FLOAT bz = vb3x*vb2ym - vb3y*vb2xm; + + const F_FLOAT rasq = ax*ax + ay*ay + az*az; + const F_FLOAT rbsq = bx*bx + by*by + bz*bz; + const F_FLOAT rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + const F_FLOAT rg = sqrt(rgsq); + + F_FLOAT rginv,ra2inv,rb2inv; + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + const F_FLOAT rabinv = sqrt(ra2inv*rb2inv); + + F_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv; + F_FLOAT s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag()) + d_warning_flag() = 1; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + const int m = d_multiplicity[type]; + F_FLOAT p = 1.0; + F_FLOAT ddf1,df1; + ddf1 = df1 = 0.0; + + for (int i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*d_cos_shift[type] + df1*d_sin_shift[type]; + df1 = df1*d_cos_shift[type] - ddf1*d_sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + d_cos_shift[type]; + df1 = 0.0; + } + + E_FLOAT edihedral = 0.0; + if (eflag) edihedral = d_k[type] * p; + + const F_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + const F_FLOAT hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + const F_FLOAT fga = fg*ra2inv*rginv; + const F_FLOAT hgb = hg*rb2inv*rginv; + const F_FLOAT gaa = -ra2inv*rg; + const F_FLOAT gbb = rb2inv*rg; + + const F_FLOAT dtfx = gaa*ax; + const F_FLOAT dtfy = gaa*ay; + const F_FLOAT dtfz = gaa*az; + const F_FLOAT dtgx = fga*ax - hgb*bx; + const F_FLOAT dtgy = fga*ay - hgb*by; + const F_FLOAT dtgz = fga*az - hgb*bz; + const F_FLOAT dthx = gbb*bx; + const F_FLOAT dthy = gbb*by; + const F_FLOAT dthz = gbb*bz; + + const F_FLOAT df = -d_k[type] * df1; + + const F_FLOAT sx2 = df*dtgx; + const F_FLOAT sy2 = df*dtgy; + const F_FLOAT sz2 = df*dtgz; + + F_FLOAT f1[3],f2[3],f3[3],f4[3]; + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + a_f(i1,0) += f1[0]; + a_f(i1,1) += f1[1]; + a_f(i1,2) += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + a_f(i2,0) += f2[0]; + a_f(i2,1) += f2[1]; + a_f(i2,2) += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + a_f(i3,0) += f3[0]; + a_f(i3,1) += f3[1]; + a_f(i3,2) += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + a_f(i4,0) += f4[0]; + a_f(i4,1) += f4[1]; + a_f(i4,2) += f4[2]; + } + + if (EVFLAG) + ev_tally(evm,i1,i2,i3,i4,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z); + + // 1-4 LJ and Coulomb interactions + // tally energy/virial in pair, using newton_bond as newton flag + + if (d_weight[type] > 0.0) { + const int itype = atomtype[i1]; + const int jtype = atomtype[i4]; + + const F_FLOAT delx = x(i1,0) - x(i4,0); + const F_FLOAT dely = x(i1,1) - x(i4,1); + const F_FLOAT delz = x(i1,2) - x(i4,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + + F_FLOAT forcecoul; + if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv; + else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv); + const F_FLOAT forcelj = r6inv * (d_lj14_1(itype,jtype)*r6inv - d_lj14_2(itype,jtype)); + const F_FLOAT fpair = d_weight[type] * (forcelj+forcecoul)*r2inv; + + F_FLOAT ecoul = 0.0; + F_FLOAT evdwl = 0.0; + if (eflag) { + ecoul = d_weight[type] * forcecoul; + evdwl = r6inv * (d_lj14_3(itype,jtype)*r6inv - d_lj14_4(itype,jtype)); + evdwl *= d_weight[type]; + } + + if (newton_bond || i1 < nlocal) { + a_f(i1,0) += delx*fpair; + a_f(i1,1) += dely*fpair; + a_f(i1,2) += delz*fpair; + } + if (newton_bond || i4 < nlocal) { + a_f(i4,0) -= delx*fpair; + a_f(i4,1) -= dely*fpair; + a_f(i4,2) -= delz*fpair; + } + + if (EVFLAG) ev_tally(evm,i1,i4,evdwl,ecoul,fpair,delx,dely,delz); + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n) const { + EVM_FLOAT evm; + this->template operator()(TagDihedralCharmmCompute(), n, evm); +} + +/* ---------------------------------------------------------------------- */ + +/* + + 288c307 + < void DihedralCharmm::allocate() + --- + > void DihedralCharmmfsw::allocate() + + */ + +template +void DihedralCharmmfswKokkos::allocate() +{ + DihedralCharmmfsw::allocate(); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more types +------------------------------------------------------------------------- */ + +/* + + 308c327 + < void DihedralCharmm::coeff(int narg, char **arg) + --- + > void DihedralCharmmfsw::coeff(int narg, char **arg) + + */ + +template +void DihedralCharmmfswKokkos::coeff(int narg, char **arg) +{ + DihedralCharmmfsw::coeff(narg, arg); + + int nd = atom->ndihedraltypes; + typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + + d_k = k_k.template view(); + d_multiplicity = k_multiplicity.template view(); + d_shift = k_shift.template view(); + d_cos_shift = k_cos_shift.template view(); + d_sin_shift = k_sin_shift.template view(); + d_weight = k_weight.template view(); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + k_shift.h_view[i] = shift[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_weight.h_view[i] = weight[i]; + } + + k_k.template modify(); + k_multiplicity.template modify(); + k_shift.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_weight.template modify(); + + k_k.template sync(); + k_multiplicity.template sync(); + k_shift.template sync(); + k_cos_shift.template sync(); + k_sin_shift.template sync(); + k_weight.template sync(); +} + +/* ---------------------------------------------------------------------- + error check and initialize all values needed for force computation +------------------------------------------------------------------------- */ + +/* + + 350c369 + < void DihedralCharmm::init_style() + --- + > void DihedralCharmmfsw::init_style() + 382a402,425 + > + > // constants for applying force switch (LJ) and force_shift (coul) + > // to 1/4 dihedral atoms to match CHARMM pairwise interactions + > + > int itmp; + > int *p_dihedflag = (int *) force->pair->extract("dihedflag", itmp); + > auto p_cutljinner = (double *) force->pair->extract("cut_lj_inner", itmp); + > auto p_cutlj = (double *) force->pair->extract("cut_lj", itmp); + > auto p_cutcoul = (double *) force->pair->extract("cut_coul", itmp); + > + > if (p_cutcoul == nullptr || p_cutljinner == nullptr || p_cutlj == nullptr || + > p_dihedflag == nullptr) + > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); + > + > dihedflag = *p_dihedflag; + > cut_coul14 = *p_cutcoul; + > cut_lj_inner14 = *p_cutljinner; + > cut_lj14 = *p_cutlj; + > + > cut_coulinv14 = 1 / cut_coul14; + > cut_lj_inner3inv = (1 / cut_lj_inner14) * (1 / cut_lj_inner14) * (1 / cut_lj_inner14); + > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; + > cut_lj3inv = (1 / cut_lj14) * (1 / cut_lj14) * (1 / cut_lj14); + > cut_lj6inv = cut_lj3inv * cut_lj3inv; + + */ + +template +void DihedralCharmmfswKokkos::init_style() +{ + DihedralCharmmfsw::init_style(); + + int n = atom->ntypes; + DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmm:lj14_1",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmm:lj14_2",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmm:lj14_3",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmm:lj14_4",n+1,n+1); + + d_lj14_1 = k_lj14_1.template view(); + d_lj14_2 = k_lj14_2.template view(); + d_lj14_3 = k_lj14_3.template view(); + d_lj14_4 = k_lj14_4.template view(); + + + if (weightflag) { + int n = atom->ntypes; + for (int i = 1; i <= n; i++) { + for (int j = 1; j <= n; j++) { + k_lj14_1.h_view(i,j) = lj14_1[i][j]; + k_lj14_2.h_view(i,j) = lj14_2[i][j]; + k_lj14_3.h_view(i,j) = lj14_3[i][j]; + k_lj14_4.h_view(i,j) = lj14_4[i][j]; + } + } + } + + k_lj14_1.template modify(); + k_lj14_2.template modify(); + k_lj14_3.template modify(); + k_lj14_4.template modify(); + + k_lj14_1.template sync(); + k_lj14_2.template sync(); + k_lj14_3.template sync(); + k_lj14_4.template sync(); +} + +/* ---------------------------------------------------------------------- + proc 0 reads coeffs from restart file, bcasts them +------------------------------------------------------------------------- */ + +/* + + 402c445 + < void DihedralCharmm::read_restart(FILE *fp) + --- + > void DihedralCharmmfsw::read_restart(FILE *fp) + + */ +template +void DihedralCharmmfswKokkos::read_restart(FILE *fp) +{ + DihedralCharmmfsw::read_restart(fp); + + int nd = atom->ndihedraltypes; + typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + + d_k = k_k.template view(); + d_multiplicity = k_multiplicity.template view(); + d_shift = k_shift.template view(); + d_cos_shift = k_cos_shift.template view(); + d_sin_shift = k_sin_shift.template view(); + d_weight = k_weight.template view(); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + k_shift.h_view[i] = shift[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_weight.h_view[i] = weight[i]; + } + + k_k.template modify(); + k_multiplicity.template modify(); + k_shift.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_weight.template modify(); + + k_k.template sync(); + k_multiplicity.template sync(); + k_shift.template sync(); + k_cos_shift.template sync(); + k_sin_shift.template sync(); + k_weight.template sync(); +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +template +//template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const +{ + E_FLOAT edihedralquarter; + F_FLOAT v[6]; + + if (eflag_either) { + if (eflag_global) { + if (newton_bond) evm.emol += edihedral; + else { + edihedralquarter = 0.25*edihedral; + if (i1 < nlocal) evm.emol += edihedralquarter; + if (i2 < nlocal) evm.emol += edihedralquarter; + if (i3 < nlocal) evm.emol += edihedralquarter; + if (i4 < nlocal) evm.emol += edihedralquarter; + } + } + if (eflag_atom) { + edihedralquarter = 0.25*edihedral; + if (newton_bond || i1 < nlocal) d_eatom[i1] += edihedralquarter; + if (newton_bond || i2 < nlocal) d_eatom[i2] += edihedralquarter; + if (newton_bond || i3 < nlocal) d_eatom[i3] += edihedralquarter; + if (newton_bond || i4 < nlocal) d_eatom[i4] += edihedralquarter; + } + } + + if (vflag_either) { + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (vflag_global) { + if (newton_bond) { + evm.v[0] += v[0]; + evm.v[1] += v[1]; + evm.v[2] += v[2]; + evm.v[3] += v[3]; + evm.v[4] += v[4]; + evm.v[5] += v[5]; + } else { + if (i1 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i2 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i3 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i4 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + } + } + + if (vflag_atom) { + if (newton_bond || i1 < nlocal) { + d_vatom(i1,0) += 0.25*v[0]; + d_vatom(i1,1) += 0.25*v[1]; + d_vatom(i1,2) += 0.25*v[2]; + d_vatom(i1,3) += 0.25*v[3]; + d_vatom(i1,4) += 0.25*v[4]; + d_vatom(i1,5) += 0.25*v[5]; + } + if (newton_bond || i2 < nlocal) { + d_vatom(i2,0) += 0.25*v[0]; + d_vatom(i2,1) += 0.25*v[1]; + d_vatom(i2,2) += 0.25*v[2]; + d_vatom(i2,3) += 0.25*v[3]; + d_vatom(i2,4) += 0.25*v[4]; + d_vatom(i2,5) += 0.25*v[5]; + } + if (newton_bond || i3 < nlocal) { + d_vatom(i3,0) += 0.25*v[0]; + d_vatom(i3,1) += 0.25*v[1]; + d_vatom(i3,2) += 0.25*v[2]; + d_vatom(i3,3) += 0.25*v[3]; + d_vatom(i3,4) += 0.25*v[4]; + d_vatom(i3,5) += 0.25*v[5]; + } + if (newton_bond || i4 < nlocal) { + d_vatom(i4,0) += 0.25*v[0]; + d_vatom(i4,1) += 0.25*v[1]; + d_vatom(i4,2) += 0.25*v[2]; + d_vatom(i4,3) += 0.25*v[3]; + d_vatom(i4,4) += 0.25*v[4]; + d_vatom(i4,5) += 0.25*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + need i < nlocal test since called by bond_quartic and dihedral_charmm +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::ev_tally(EVM_FLOAT &evm, const int i, const int j, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + E_FLOAT evdwlhalf,ecoulhalf,epairhalf; + F_FLOAT v[6]; + + + if (eflag_either) { + if (eflag_global) { + if (newton_bond) { + evm.evdwl += evdwl; + evm.ecoul += ecoul; + } else { + evdwlhalf = 0.5*evdwl; + ecoulhalf = 0.5*ecoul; + if (i < nlocal) { + evm.evdwl += evdwlhalf; + evm.ecoul += ecoulhalf; + } + if (j < nlocal) { + evm.evdwl += evdwlhalf; + evm.ecoul += ecoulhalf; + } + } + } + if (eflag_atom) { + epairhalf = 0.5 * (evdwl + ecoul); + if (newton_bond || i < nlocal) d_eatom_pair[i] += epairhalf; + if (newton_bond || j < nlocal) d_eatom_pair[j] += epairhalf; + } + } + + if (vflag_either) { + v[0] = delx*delx*fpair; + v[1] = dely*dely*fpair; + v[2] = delz*delz*fpair; + v[3] = delx*dely*fpair; + v[4] = delx*delz*fpair; + v[5] = dely*delz*fpair; + + if (vflag_global) { + if (newton_bond) { + evm.vp[0] += v[0]; + evm.vp[1] += v[1]; + evm.vp[2] += v[2]; + evm.vp[3] += v[3]; + evm.vp[4] += v[4]; + evm.vp[5] += v[5]; + } else { + if (i < nlocal) { + evm.vp[0] += 0.5*v[0]; + evm.vp[1] += 0.5*v[1]; + evm.vp[2] += 0.5*v[2]; + evm.vp[3] += 0.5*v[3]; + evm.vp[4] += 0.5*v[4]; + evm.vp[5] += 0.5*v[5]; + } + if (j < nlocal) { + evm.vp[0] += 0.5*v[0]; + evm.vp[1] += 0.5*v[1]; + evm.vp[2] += 0.5*v[2]; + evm.vp[3] += 0.5*v[3]; + evm.vp[4] += 0.5*v[4]; + evm.vp[5] += 0.5*v[5]; + } + } + } + + if (vflag_atom) { + if (newton_bond || i < nlocal) { + d_vatom_pair(i,0) += 0.5*v[0]; + d_vatom_pair(i,1) += 0.5*v[1]; + d_vatom_pair(i,2) += 0.5*v[2]; + d_vatom_pair(i,3) += 0.5*v[3]; + d_vatom_pair(i,4) += 0.5*v[4]; + d_vatom_pair(i,5) += 0.5*v[5]; + } + if (newton_bond || j < nlocal) { + d_vatom_pair(j,0) += 0.5*v[0]; + d_vatom_pair(j,1) += 0.5*v[1]; + d_vatom_pair(j,2) += 0.5*v[2]; + d_vatom_pair(j,3) += 0.5*v[3]; + d_vatom_pair(j,4) += 0.5*v[4]; + d_vatom_pair(j,5) += 0.5*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class DihedralCharmmfswKokkos; +#ifdef LMP_KOKKOS_GPU +template class DihedralCharmmfswKokkos; +#endif +} + + + +/* + + + 355c374 + < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'pair'"); + --- + > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'pair'"); + 357c376 + < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'outer'"); + --- + > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'outer'"); + 373c392 + < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); + --- + > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); + 380c399 + < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); + --- + > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); + + 389c432 + < void DihedralCharmm::write_restart(FILE *fp) + --- + > void DihedralCharmmfsw::write_restart(FILE *fp) + 430c473 + < void DihedralCharmm::write_data(FILE *fp) + --- + > void DihedralCharmmfsw::write_data(FILE *fp) + + */ + +// nothing to do for all these, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h new file mode 100644 index 0000000000..413945826f --- /dev/null +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h @@ -0,0 +1,267 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of DihedralCharmmfswKokkos exactly + same as DihedralCharmmKokkos but with new class name + + (2) second draft version: nothing changed in header file + + method: track changes from serial kspace dihedral_charmm to + dihedral_charmmfsw and apply to DihedralCharmmKokkos + + % diff dihedral_charmm.h dihedral_charmmfsw.h + +------------------------------------------------------------------------- */ + +/* + + 16c16 + < DihedralStyle(charmm,DihedralCharmm); + --- + > DihedralStyle(charmmfsw,DihedralCharmmfsw); + + */ + +#ifdef DIHEDRAL_CLASS +// clang-format off +DihedralStyle(charmmfsw/kk,DihedralCharmmfswKokkos); +DihedralStyle(charmmfsw/kk/device,DihedralCharmmfswKokkos); +DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); +// clang-format on +#else + +/* + + 20,21c20,21 + < #ifndef LMP_DIHEDRAL_CHARMM_H + < #define LMP_DIHEDRAL_CHARMM_H + --- + > #ifndef LMP_DIHEDRAL_CHARMMFSW_H + > #define LMP_DIHEDRAL_CHARMMFSW_H + + */ + +// clang-format off +#ifndef LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H +#define LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H + +#include "dihedral_charmmfsw.h" +#include "kokkos_type.h" +#include "dihedral_charmm_kokkos.h" + +/* + + s_EVM_FLOAT and TagDihedralCharmmCompute conflict because style_dihedral.h + includes both dihedral_charmm_kokkos.h and dihedral_charmmfsw_kokkos.h + so comment out definitions in here and include dihedral_charmm_kokkos.h + in dihedral_charmmfsw_kokkos.h: + + In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' + struct s_EVM_FLOAT { + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here + struct s_EVM_FLOAT { + ^ + In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' + struct TagDihedralCharmmCompute{}; + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here + struct TagDihedralCharmmCompute{}; + ^ + In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' + struct s_EVM_FLOAT { + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here + struct s_EVM_FLOAT { + ^ + In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' + struct TagDihedralCharmmCompute{}; + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here + struct TagDihedralCharmmCompute{}; + ^ + + */ + +namespace LAMMPS_NS { + +/* +struct s_EVM_FLOAT { + E_FLOAT evdwl; + E_FLOAT ecoul; + E_FLOAT emol; + F_FLOAT v[6]; + F_FLOAT vp[6]; + KOKKOS_INLINE_FUNCTION + s_EVM_FLOAT() { + evdwl = 0; + ecoul = 0; + emol = 0; + v[0] = 0; v[1] = 0; v[2] = 0; + v[3] = 0; v[4] = 0; v[5] = 0; + vp[0] = 0; vp[1] = 0; vp[2] = 0; + vp[3] = 0; vp[4] = 0; vp[5] = 0; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const s_EVM_FLOAT &rhs) { + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + emol += rhs.emol; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; + vp[0] += rhs.vp[0]; + vp[1] += rhs.vp[1]; + vp[2] += rhs.vp[2]; + vp[3] += rhs.vp[3]; + vp[4] += rhs.vp[4]; + vp[5] += rhs.vp[5]; + } +}; +typedef struct s_EVM_FLOAT EVM_FLOAT; + +template +struct TagDihedralCharmmCompute{}; + +*/ + +/* + 27c27 + < class DihedralCharmm : public Dihedral { + --- + > class DihedralCharmmfsw : public Dihedral { + 29,30c29,30 + < DihedralCharmm(class LAMMPS *); + < ~DihedralCharmm() override; + --- + > DihedralCharmmfsw(class LAMMPS *); + > ~DihedralCharmmfsw() override; + + */ + +template +class DihedralCharmmfswKokkos : public DihedralCharmmfsw { + public: + typedef DeviceType device_type; + typedef EVM_FLOAT value_type; + typedef ArrayTypes AT; + + DihedralCharmmfswKokkos(class LAMMPS *); + ~DihedralCharmmfswKokkos() override; + void compute(int, int) override; + void coeff(int, char **) override; + void init_style() override; + void read_restart(FILE *) override; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralCharmmCompute, const int&, EVM_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralCharmmCompute, const int&) const; + + //template + KOKKOS_INLINE_FUNCTION + void ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const; + + KOKKOS_INLINE_FUNCTION + void ev_tally(EVM_FLOAT &evm, const int i, const int j, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + protected: + + class NeighborKokkos *neighborKK; + + typename AT::t_x_array_randomread x; + typename AT::t_int_1d_randomread atomtype; + typename AT::t_ffloat_1d_randomread q; + typename AT::t_f_array f; + typename AT::t_int_2d dihedrallist; + + typedef typename KKDevice::value KKDeviceType; + Kokkos::DualView k_eatom; + Kokkos::DualView k_vatom; + Kokkos::View > d_eatom; + Kokkos::View > d_vatom; + + Kokkos::DualView k_eatom_pair; + Kokkos::DualView k_vatom_pair; + Kokkos::View > d_eatom_pair; + Kokkos::View > d_vatom_pair; + + int nlocal,newton_bond; + int eflag,vflag; + double qqrd2e; + + Kokkos::DualView k_warning_flag; + typename Kokkos::DualView::t_dev d_warning_flag; + typename Kokkos::DualView::t_host h_warning_flag; + + typename AT::t_ffloat_2d d_lj14_1; + typename AT::t_ffloat_2d d_lj14_2; + typename AT::t_ffloat_2d d_lj14_3; + typename AT::t_ffloat_2d d_lj14_4; + + typename AT::t_ffloat_1d d_k; + typename AT::t_ffloat_1d d_multiplicity; + typename AT::t_ffloat_1d d_shift; + typename AT::t_ffloat_1d d_sin_shift; + typename AT::t_ffloat_1d d_cos_shift; + typename AT::t_ffloat_1d d_weight; + + void allocate() override; +}; + +} + +#endif +#endif + + + +/* + + 38a39,43 + > int implicit, weightflag, dihedflag; + > double cut_lj_inner14, cut_lj14, cut_coul14; + > double evdwl14_12, evdwl14_6, cut_coulinv14; + > double cut_lj_inner3inv, cut_lj_inner6inv, cut_lj3inv, cut_lj6inv; + > + 42d46 + < int implicit, weightflag; + + */ + +// nothing to do here, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp new file mode 100644 index 0000000000..88efec5fda --- /dev/null +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -0,0 +1,941 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + + Contributing authors: + + - Ray Shan (SNL) - original PairLJCharmmCoulLongKokkos + + - Mitch Murphy (alphataubio) - PairLJCharmmfswCoulLongKokkos update (2023/12) + + Based on serial kspace lj-fsw sections (force-switched) provided by + Robert Meissner and Lucio Colombi Ciacchi of Bremen University, Germany, + with additional assistance from Robert A. Latour, Clemson University + + ------------------------------------------------------------------------- */ + + + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of PairLJCharmmfswCoulLongKokkos almost exactly + same as PairLJCharmmCoulLongKokkos but with new class name + + method: track changes from serial kspace pair_lj_charmm_coul_long to + pair_lj_charmmfsw_coul_long and apply to PairLJCharmmCoulLongKokkos + + ISSUES: + + (A) charmm denom_lj_inv cache , is it to optimize code because division + is slower that multiplication ?? + + + + ------------------------------------------------------------------------- */ + + +/* + 19c23 + < #include "pair_lj_charmm_coul_long.h" + --- + > #include "pair_lj_charmmfsw_coul_long.h" + + */ + +#include "pair_lj_charmmfsw_coul_long_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; + + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +/* + 47c51 + < PairLJCharmmCoulLong::PairLJCharmmCoulLong(LAMMPS *lmp) : Pair(lmp) + --- + > PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp) + 55a60,72 + > + > // short-range/long-range flag accessed by DihedralCharmmfsw + > + > dihedflag = 1; + > + > // switch qqr2e from LAMMPS value to CHARMM value + > + > if (strcmp(update->unit_style,"real") == 0) { + > if ((comm->me == 0) && (force->qqr2e != force->qqr2e_charmm_real)) + > error->message(FLERR,"Switching to CHARMM coulomb energy" + > " conversion constant"); + > force->qqr2e = force->qqr2e_charmm_real; + > } + + */ + +// added superclass constructor to inherit from PairLJCharmmfswCoulLong + +template +PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp) +{ + + // pair_lj_charmmfsw_coul_long_kokkos.cpp:112:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context + // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(lmp); + + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +/* + + 60c77 + < PairLJCharmmCoulLong::~PairLJCharmmCoulLong() + --- + > PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() + 61a79,87 + > // switch qqr2e back from CHARMM value to LAMMPS value + > + > if (update && strcmp(update->unit_style,"real") == 0) { + > if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real)) + > error->message(FLERR,"Restoring original LAMMPS coulomb energy" + > " conversion constant"); + > force->qqr2e = force->qqr2e_lammps_real; + > } + > + + */ + +// added superclass constructor to inherit from PairLJCharmmfswCoulLong + +template +PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() +{ + + // pair_lj_charmmfsw_coul_long_kokkos.cpp:150:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context + // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(); + + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + } +} + +/* ---------------------------------------------------------------------- */ + +/* + 87c112 + < void PairLJCharmmCoulLong::compute(int eflag, int vflag) + --- + > void PairLJCharmmfswCoulLong::compute(int eflag, int vflag) + 90c115 + < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + --- + > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl12,evdwl6,ecoul,fpair; + 92c117 + < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + --- + > double r,rinv,r2inv,r3inv,r6inv,rsq,forcecoul,forcelj,factor_coul,factor_lj; + 94c119 + < double philj,switch1,switch2; + --- + > double switch1; + 96d120 + < double rsq; + 174,179c198,200 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + < } + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + > } + 205d225 + < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + 207,209c227,240 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < evdwl *= switch1; + --- + > r = sqrt(rsq); + > rinv = 1.0/r; + > r3inv = rinv*rinv*rinv; + > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > evdwl = evdwl12 + evdwl6; + > } else { + > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > evdwl6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > evdwl = evdwl12 + evdwl6; + + */ + +template +void PairLJCharmmfswCoulLongKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + newton_pair = force->newton_pair; + + // loop over neighbors of my atoms + + copymode = 1; + + EV_FLOAT ev; + if (ncoultablebits) + ev = pair_compute,CoulLongTable<1> > + (this,(NeighListKokkos*)list); + else + ev = pair_compute,CoulLongTable<0> > + (this,(NeighListKokkos*)list); + + + if (eflag) { + eng_vdwl += ev.evdwl; + eng_coul += ev.ecoul; + } + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- + compute LJ CHARMM pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT forcelj, switch1, switch2, englj; + + forcelj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj; + englj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); + forcelj = forcelj*switch1 + englj*switch2; + } + + return forcelj*r2inv; +} + +/* ---------------------------------------------------------------------- + compute LJ CHARMM pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT englj, switch1; + + englj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); + + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + englj *= switch1; + } + + return englj; + +} + +/* ---------------------------------------------------------------------- + compute coulomb pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, + const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable]; + F_FLOAT forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + return forcecoul/rsq; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv; + F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + + return forcecoul*rinv*rinv; + } +} + +/* ---------------------------------------------------------------------- + compute coulomb pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable]; + F_FLOAT ecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + ecoul -= (1.0-factor_coul)*prefactor; + } + return ecoul; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r; + F_FLOAT ecoul = prefactor * erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + return ecoul; + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairLJCharmmfswCoulLongKokkos::allocate() +{ + PairLJCharmmfswCoulLong::allocate(); + + int n = atom->ntypes; + + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + + d_cut_ljsq = typename AT::t_ffloat_2d("pair:cut_ljsq",n+1,n+1); + + d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1); + + k_params = Kokkos::DualView("PairLJCharmmCoulLong::params",n+1,n+1); + params = k_params.template view(); +} + +template +void PairLJCharmmfswCoulLongKokkos::init_tables(double cut_coul, double *cut_respa) +{ + Pair::init_tables(cut_coul,cut_respa); + + typedef typename ArrayTypes::t_ffloat_1d table_type; + typedef typename ArrayTypes::t_ffloat_1d host_table_type; + + int ntable = 1; + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + + + // Copy rtable and drtable + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = rtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_rtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = drtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_drtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ftable and dftable + for (int i = 0; i < ntable; i++) { + h_table(i) = ftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ctable and dctable + for (int i = 0; i < ntable; i++) { + h_table(i) = ctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy etable and detable + for (int i = 0; i < ntable; i++) { + h_table(i) = etable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_etable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = detable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_detable = d_table; + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +/* + 682c733 + < void PairLJCharmmCoulLong::init_style() + --- + > void PairLJCharmmfswCoulLong::init_style() + 686c737 + < "Pair style lj/charmm/coul/long requires atom attribute q"); + --- + > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); + 688c739 + < // request regular or rRESPA neighbor list + --- + > // request regular or rRESPA neighbor lists + 705a757,766 + > cut_ljinv = 1.0/cut_lj; + > cut_lj_innerinv = 1.0/cut_lj_inner; + > cut_lj3 = cut_lj * cut_lj * cut_lj; + > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; + > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; + > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; + > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; + > cut_lj6inv = cut_lj3inv * cut_lj3inv; + > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; + > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; + 709,711c770,773 + < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + < (cut_ljsq-cut_lj_innersq) ); + < denom_lj_inv = 1.0 / denom_lj; + --- + > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + > (cut_ljsq-cut_lj_innersq); + > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); + > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); + 718,730d779 + < cut_in_off = cut_respa[0]; + < cut_in_on = cut_respa[1]; + < cut_out_on = cut_respa[2]; + < cut_out_off = cut_respa[3]; + < + < cut_in_diff = cut_in_on - cut_in_off; + < cut_out_diff = cut_out_off - cut_out_on; + < cut_in_diff_inv = 1.0 / (cut_in_diff); + < cut_out_diff_inv = 1.0 / (cut_out_diff); + < cut_in_off_sq = cut_in_off*cut_in_off; + < cut_in_on_sq = cut_in_on*cut_in_on; + < cut_out_on_sq = cut_out_on*cut_out_on; + < cut_out_off_sq = cut_out_off*cut_out_off; + + */ + +template +void PairLJCharmmfswCoulLongKokkos::init_style() +{ + PairLJCharmmfswCoulLong::init_style(); + + Kokkos::deep_copy(d_cut_ljsq,cut_ljsq); + Kokkos::deep_copy(d_cut_coulsq,cut_coulsq); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairLJCharmmfswCoulLongKokkos::init_one(int i, int j) +{ + double cutone = PairLJCharmmfswCoulLong::init_one(i,j); + + k_params.h_view(i,j).lj1 = lj1[i][j]; + k_params.h_view(i,j).lj2 = lj2[i][j]; + k_params.h_view(i,j).lj3 = lj3[i][j]; + k_params.h_view(i,j).lj4 = lj4[i][j]; + //k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cut_ljsq = cut_ljsq; + k_params.h_view(i,j).cut_coulsq = cut_coulsq; + + k_params.h_view(j,i) = k_params.h_view(i,j); + if (i(); + k_params.template modify(); + + return cutone; +} + +namespace LAMMPS_NS { +template class PairLJCharmmfswCoulLongKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairLJCharmmfswCoulLongKokkos; +#endif +} + + + + +/* + 80d105 + < memory->destroy(offset); + 598c650 + < void PairLJCharmmCoulLong::allocate() + --- + > void PairLJCharmmfswCoulLong::allocate() + 622d673 + < memory->create(offset,n+1,n+1,"pair:offset"); + 631c682 + < void PairLJCharmmCoulLong::settings(int narg, char **arg) + --- + > void PairLJCharmmfswCoulLong::settings(int narg, char **arg) + 645c696 + < void PairLJCharmmCoulLong::coeff(int narg, char **arg) + --- + > void PairLJCharmmfswCoulLong::coeff(int narg, char **arg) + 752c801 + < double PairLJCharmmCoulLong::init_one(int i, int j) + --- + > double PairLJCharmmfswCoulLong::init_one(int i, int j) + 790c839 + < void PairLJCharmmCoulLong::write_restart(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_restart(FILE *fp) + 811c860 + < void PairLJCharmmCoulLong::read_restart(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::read_restart(FILE *fp) + 842c891 + < void PairLJCharmmCoulLong::write_restart_settings(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_restart_settings(FILE *fp) + 857c906 + < void PairLJCharmmCoulLong::read_restart_settings(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::read_restart_settings(FILE *fp) + 882c931 + < void PairLJCharmmCoulLong::write_data(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_data(FILE *fp) + 893c942 + < void PairLJCharmmCoulLong::write_data_all(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_data_all(FILE *fp) + 903c952 + < double PairLJCharmmCoulLong::single(int i, int j, int itype, int jtype, + --- + > double PairLJCharmmfswCoulLong::single(int i, int j, int itype, int jtype, + 908,909c957,958 + < double r2inv,r6inv,r,grij,expm2,t,erfc,prefactor; + < double switch1,switch2,fraction,table,forcecoul,forcelj,phicoul,philj; + --- + > double r,rinv,r2inv,r3inv,r6inv,grij,expm2,t,erfc,prefactor; + > double switch1,fraction,table,forcecoul,forcelj,phicoul,philj,philj12,philj6; + 911a961,962 + > r = sqrt(rsq); + > rinv = 1.0/r; + 939c990,991 + < r6inv = r2inv*r2inv*r2inv; + --- + > r3inv = rinv*rinv*rinv; + > r6inv = r3inv*r3inv; + 943,947c995,996 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 965d1013 + < philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + 967,969c1015,1025 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < philj *= switch1; + --- + > philj12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > philj6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > philj = philj12 + philj6; + > } else { + > philj12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > philj6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > philj = philj12 + philj6; + 979c1035 + < void *PairLJCharmmCoulLong::extract(const char *str, int &dim) + --- + > void *PairLJCharmmfswCoulLong::extract(const char *str, int &dim) + 988a1045,1047 + > + > // info extracted by dihedral_charmmfsw + > + 989a1049,1051 + > if (strcmp(str,"cut_lj_inner") == 0) return (void *) &cut_lj_inner; + > if (strcmp(str,"cut_lj") == 0) return (void *) &cut_lj; + > if (strcmp(str,"dihedflag") == 0) return (void *) &dihedflag; + + + */ + +// nothing to do for all these, inherited from PairLJCharmmfswCoulLong + + + + +/* + + 226c257 + < void PairLJCharmmCoulLong::compute_inner() + --- + > void PairLJCharmmfswCoulLong::compute_inner() + 248a280,286 + > double cut_out_on = cut_respa[0]; + > double cut_out_off = cut_respa[1]; + > + > double cut_out_diff = cut_out_off - cut_out_on; + > double cut_out_on_sq = cut_out_on*cut_out_on; + > double cut_out_off_sq = cut_out_off*cut_out_off; + > + 284c322 + < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; + --- + > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + 303c341 + < void PairLJCharmmCoulLong::compute_middle() + --- + > void PairLJCharmmfswCoulLong::compute_middle() + 308c346 + < double philj,switch1,switch2; + --- + > double switch1; + 326a365,376 + > double cut_in_off = cut_respa[0]; + > double cut_in_on = cut_respa[1]; + > double cut_out_on = cut_respa[2]; + > double cut_out_off = cut_respa[3]; + > + > double cut_in_diff = cut_in_on - cut_in_off; + > double cut_out_diff = cut_out_off - cut_out_on; + > double cut_in_off_sq = cut_in_off*cut_in_off; + > double cut_in_on_sq = cut_in_on*cut_in_on; + > double cut_out_on_sq = cut_out_on*cut_out_on; + > double cut_out_off_sq = cut_out_off*cut_out_off; + > + 361,365c411,412 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 370c417 + < rsw = (sqrt(rsq) - cut_in_off)*cut_in_diff_inv; + --- + > rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + 374c421 + < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; + --- + > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + 393c440 + < void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag) + --- + > void PairLJCharmmfswCoulLong::compute_outer(int eflag, int vflag) + 396c443 + < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + --- + > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl6,evdwl12,ecoul,fpair; + 398c445 + < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + --- + > double r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + 400c447 + < double philj,switch1,switch2; + --- + > double switch1; + 422a470,476 + > double cut_in_off = cut_respa[2]; + > double cut_in_on = cut_respa[3]; + > + > double cut_in_diff = cut_in_on - cut_in_off; + > double cut_in_off_sq = cut_in_off*cut_in_off; + > double cut_in_on_sq = cut_in_on*cut_in_on; + > + 448a503 + > r6inv = r2inv*r2inv*r2inv; + 489d543 + < r6inv = r2inv*r2inv*r2inv; + 493,497c547,548 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 533d583 + < r6inv = r2inv*r2inv*r2inv; + 536,538c586,598 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < evdwl *= switch1; + --- + > rinv = sqrt(r2inv); + > r3inv = r2inv*rinv; + > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > evdwl = evdwl12 + evdwl6; + > } else { + > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > evdwl6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > evdwl = evdwl12 + evdwl6; + 561d620 + < r6inv = r2inv*r2inv*r2inv; + 565,569c624,625 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 572d627 + < r6inv = r2inv*r2inv*r2inv; + 576,580c631,632 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + + */ + +// kokkos doesnt support respa, so ignore compute_inner / compute_middle / compute_outer diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h new file mode 100644 index 0000000000..e9a6b5486f --- /dev/null +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h @@ -0,0 +1,230 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of PairLJCharmmfswCoulLongKokkos exactly + same as PairLJCharmmCoulLongKokkos but with new class name + + method: track changes from serial kspace pair_lj_charmm_coul_long to + pair_lj_charmmfsw_coul_long and apply to PairLJCharmmfswCoulLongKokkos + + % diff pair_lj_charmm_coul_long.h pair_lj_charmmfsw_coul_long.h + + +------------------------------------------------------------------------- */ + +/* + 16c16 + < PairStyle(lj/charmm/coul/long,PairLJCharmmCoulLong); + --- + > PairStyle(lj/charmmfsw/coul/long,PairLJCharmmfswCoulLong); + + */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(lj/charmmfsw/coul/long/kk,PairLJCharmmfswCoulLongKokkos); +PairStyle(lj/charmmfsw/coul/long/kk/device,PairLJCharmmfswCoulLongKokkos); +PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos); +// clang-format on +#else + +/* + + 20,21c20,21 + < #ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_H + < #define LMP_PAIR_LJ_CHARMM_COUL_LONG_H + --- + > #ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H + > #define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H + + */ + +// clang-format off +#ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H +#define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj_charmmfsw_coul_long.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +/* + + 27c27 +< class PairLJCharmmCoulLong : public Pair { +--- +> class PairLJCharmmfswCoulLong : public Pair { + + */ + +template +class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + + /* + + 29,30c29,30 + < PairLJCharmmCoulLong(class LAMMPS *); + < ~PairLJCharmmCoulLong() override; + --- + > PairLJCharmmfswCoulLong(class LAMMPS *); + > ~PairLJCharmmfswCoulLong() override; + + */ + + PairLJCharmmfswCoulLongKokkos(class LAMMPS *); + ~PairLJCharmmfswCoulLongKokkos() override; + + void compute(int, int) override; + + void init_tables(double cut_coul, double *cut_respa) override; + void init_style() override; + double init_one(int, int) override; + + protected: + + /* + 52c52,54 + < double cut_lj_inner, cut_lj; + --- + > int dihedflag; + > + > double cut_lj_inner, cut_lj, cut_ljinv, cut_lj_innerinv; + 53a56,57 + > double cut_lj3inv, cut_lj_inner3inv, cut_lj3, cut_lj_inner3; + > double cut_lj6inv, cut_lj_inner6inv, cut_lj6, cut_lj_inner6; + 56,60c60 + < double cut_in_off, cut_in_on, cut_out_off, cut_out_on; + < double cut_in_diff, cut_out_diff; + < double cut_in_diff_inv, cut_out_diff_inv; + < double cut_in_off_sq, cut_in_on_sq, cut_out_off_sq, cut_out_on_sq; + < double denom_lj, denom_lj_inv; + --- + > double denom_lj, denom_lj12, denom_lj6; + + */ + + // almost nothing to do here, inherited from PairLJCharmmfswCoulLong + // only temporarily need cut_lj_innersq, denom_coul protected variables + // (removed from pair_lj_charmm_coul_long to pair_lj_charmmfsw_coul_long) + // to compile draft version 1, can be removed by draft version 2 + + + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, + const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + // hardwired to space for 12 atom types + params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread q; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + int newton_pair; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + typename AT::t_ffloat_2d d_cut_ljsq; + typename AT::t_ffloat_2d d_cut_coulsq; + + typename AT::t_ffloat_1d_randomread + d_rtable, d_drtable, d_ftable, d_dftable, + d_ctable, d_dctable, d_etable, d_detable; + + int neighflag; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + void allocate() override; + + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmfswCoulLongKokkos*, + NeighListKokkos*); + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmfswCoulLongKokkos*, + NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairLJCharmmfswCoulLongKokkos*); + +}; + +} + +#endif +#endif + From 3a1d3bb64d604f48e76a930e88eefd9de062b472 Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Wed, 3 Jan 2024 02:42:15 -0500 Subject: [PATCH 052/448] second draft... applied changes to compute methods --- src/KOKKOS/dihedral_charmmfsw_kokkos.cpp | 95 ++++---- src/KOKKOS/dihedral_charmmfsw_kokkos.h | 28 +-- .../pair_lj_charmmfsw_coul_long_kokkos.cpp | 206 +++++++++--------- 3 files changed, 169 insertions(+), 160 deletions(-) diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp index facb723580..b309f3d97f 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp @@ -18,7 +18,7 @@ - Stan Moore (SNL) original DihedralCharmmfswKokkos - - Mitch Murphy (alphataubio) - DihedralCharmmfswKokkos update (2023/12) + - Mitch Murphy (alphataubio) - DihedralCharmmfswKokkos update (2024/01) Based on serial dihedral_charmmfsw.cpp lj-fsw sections (force-switched) provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen @@ -138,21 +138,7 @@ DihedralCharmmfswKokkos::~DihedralCharmmfswKokkos() 259a269,270 > else > forcecoul = qqrd2e * q[i1] * q[i4] * (sqrt(r2inv) - r * cut_coulinv14 * cut_coulinv14); - 264,265c275,284 - < ecoul = weight[type] * forcecoul; - < evdwl = r6inv * (lj14_3[itype][jtype] * r6inv - lj14_4[itype][jtype]); - --- - > if (dihedflag) - > ecoul = weight[type] * forcecoul; - > else - > ecoul = weight[type] * qqrd2e * q[i1] * q[i4] * - > (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); - > evdwl14_12 = r6inv * lj14_3[itype][jtype] * r6inv - - > lj14_3[itype][jtype] * cut_lj_inner6inv * cut_lj6inv; - > evdwl14_6 = - > -lj14_4[itype][jtype] * r6inv + lj14_4[itype][jtype] * cut_lj_inner3inv * cut_lj3inv; - > evdwl = evdwl14_12 + evdwl14_6; - + */ @@ -225,15 +211,15 @@ void DihedralCharmmfswKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (newton_bond) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); } } else { if (newton_bond) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); } } @@ -303,7 +289,7 @@ void DihedralCharmmfswKokkos::compute(int eflag_in, int vflag_in) template template KOKKOS_INLINE_FUNCTION -void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n, EVM_FLOAT& evm) const { +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute, const int &n, EVM_FLOAT& evm) const { // The f array is atomic Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; @@ -480,11 +466,38 @@ void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute if (dihedflag) + > ecoul = weight[type] * forcecoul; + > else + > ecoul = weight[type] * qqrd2e * q[i1] * q[i4] * + > (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); + > evdwl14_12 = r6inv * lj14_3[itype][jtype] * r6inv - + > lj14_3[itype][jtype] * cut_lj_inner6inv * cut_lj6inv; + > evdwl14_6 = + > -lj14_4[itype][jtype] * r6inv + lj14_4[itype][jtype] * cut_lj_inner3inv * cut_lj3inv; + > evdwl = evdwl14_12 + evdwl14_6; + */ + + const F_FLOAT r = sqrt(rsq); F_FLOAT ecoul = 0.0; F_FLOAT evdwl = 0.0; + F_FLOAT evdwl14_12, evdwl14_6; if (eflag) { - ecoul = d_weight[type] * forcecoul; - evdwl = r6inv * (d_lj14_3(itype,jtype)*r6inv - d_lj14_4(itype,jtype)); + if (dihedflag) + ecoul = d_weight[type] * forcecoul; + else + ecoul = d_weight[type] * qqrd2e * q[i1] * q[i4] * + (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); + evdwl14_12 = r6inv * d_lj14_3(itype,jtype) * r6inv - + d_lj14_3(itype,jtype) * cut_lj_inner6inv * cut_lj6inv; + evdwl14_6 = + -d_lj14_4(itype,jtype) * r6inv + d_lj14_4(itype,jtype) * cut_lj_inner3inv * cut_lj3inv; + evdwl = evdwl14_12 + evdwl14_6; evdwl *= d_weight[type]; } @@ -506,9 +519,9 @@ void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute template KOKKOS_INLINE_FUNCTION -void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n) const { +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute, const int &n) const { EVM_FLOAT evm; - this->template operator()(TagDihedralCharmmCompute(), n, evm); + this->template operator()(TagDihedralCharmmfswCompute(), n, evm); } /* ---------------------------------------------------------------------- */ @@ -547,12 +560,12 @@ void DihedralCharmmfswKokkos::coeff(int narg, char **arg) DihedralCharmmfsw::coeff(narg, arg); int nd = atom->ndihedraltypes; - typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); - typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); - typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); - typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); - typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); - typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1); d_k = k_k.template view(); d_multiplicity = k_multiplicity.template view(); @@ -630,10 +643,10 @@ void DihedralCharmmfswKokkos::init_style() DihedralCharmmfsw::init_style(); int n = atom->ntypes; - DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmm:lj14_1",n+1,n+1); - DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmm:lj14_2",n+1,n+1); - DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmm:lj14_3",n+1,n+1); - DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmm:lj14_4",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmmfsw:lj14_1",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmmfsw:lj14_2",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmmfsw:lj14_3",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmmfsw:lj14_4",n+1,n+1); d_lj14_1 = k_lj14_1.template view(); d_lj14_2 = k_lj14_2.template view(); @@ -682,12 +695,12 @@ void DihedralCharmmfswKokkos::read_restart(FILE *fp) DihedralCharmmfsw::read_restart(fp); int nd = atom->ndihedraltypes; - typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); - typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); - typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); - typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); - typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); - typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1); d_k = k_k.template view(); d_multiplicity = k_multiplicity.template view(); diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h index 413945826f..8b57b28d0c 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.h +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h @@ -78,14 +78,6 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here struct s_EVM_FLOAT { ^ - In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' - struct TagDihedralCharmmCompute{}; - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here - struct TagDihedralCharmmCompute{}; - ^ In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' @@ -94,14 +86,6 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here struct s_EVM_FLOAT { ^ - In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' - struct TagDihedralCharmmCompute{}; - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here - struct TagDihedralCharmmCompute{}; - ^ */ @@ -146,10 +130,12 @@ struct s_EVM_FLOAT { }; typedef struct s_EVM_FLOAT EVM_FLOAT; -template -struct TagDihedralCharmmCompute{}; + */ + +template +struct TagDihedralCharmmfswCompute{}; + -*/ /* 27c27 @@ -181,11 +167,11 @@ class DihedralCharmmfswKokkos : public DihedralCharmmfsw { template KOKKOS_INLINE_FUNCTION - void operator()(TagDihedralCharmmCompute, const int&, EVM_FLOAT&) const; + void operator()(TagDihedralCharmmfswCompute, const int&, EVM_FLOAT&) const; template KOKKOS_INLINE_FUNCTION - void operator()(TagDihedralCharmmCompute, const int&) const; + void operator()(TagDihedralCharmmfswCompute, const int&) const; //template KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 88efec5fda..7c1da2479f 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -18,7 +18,7 @@ - Ray Shan (SNL) - original PairLJCharmmCoulLongKokkos - - Mitch Murphy (alphataubio) - PairLJCharmmfswCoulLongKokkos update (2023/12) + - Mitch Murphy (alphataubio) - PairLJCharmmfswCoulLongKokkos update (2024/01) Based on serial kspace lj-fsw sections (force-switched) provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen University, Germany, @@ -113,9 +113,6 @@ using namespace LAMMPS_NS; template PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp) { - - // pair_lj_charmmfsw_coul_long_kokkos.cpp:112:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context - // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(lmp); respa_enable = 0; @@ -147,15 +144,10 @@ PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS */ -// added superclass constructor to inherit from PairLJCharmmfswCoulLong - template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { - // pair_lj_charmmfsw_coul_long_kokkos.cpp:150:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context - // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(); - if (copymode) return; if (allocated) { @@ -186,39 +178,7 @@ PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() > double switch1; 96d120 < double rsq; - 174,179c198,200 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - < } - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - > } - 205d225 - < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); - 207,209c227,240 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < evdwl *= switch1; - --- - > r = sqrt(rsq); - > rinv = 1.0/r; - > r3inv = rinv*rinv*rinv; - > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > evdwl = evdwl12 + evdwl6; - > } else { - > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > evdwl6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > evdwl = evdwl12 + evdwl6; - + */ template @@ -320,20 +280,32 @@ compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { const F_FLOAT r2inv = 1.0/rsq; const F_FLOAT r6inv = r2inv*r2inv*r2inv; - F_FLOAT forcelj, switch1, switch2, englj; + F_FLOAT forcelj, switch1; forcelj = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); if (rsq > cut_lj_innersq) { + + /* + 174,179c198,200 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + < } + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + > } + + */ + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - switch2 = 12.0*rsq * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj; - englj = r6inv * - ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); - forcelj = forcelj*switch1 + englj*switch2; + forcelj = forcelj*switch1; } return forcelj*r2inv; @@ -350,20 +322,52 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { const F_FLOAT r2inv = 1.0/rsq; const F_FLOAT r6inv = r2inv*r2inv*r2inv; - F_FLOAT englj, switch1; + const F_FLOAT r = sqrt(rsq); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT r3inv = rinv*rinv*rinv; + F_FLOAT englj, englj12, englj6; + + /* + 205d225 + < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + 207,209c227,240 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < evdwl *= switch1; + --- + > r = sqrt(rsq); + > rinv = 1.0/r; + > r3inv = rinv*rinv*rinv; + > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > evdwl = evdwl12 + evdwl6; + > } else { + > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > evdwl6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > evdwl = evdwl12 + evdwl6; + + */ - englj = r6inv * - ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - englj *= switch1; + englj12 = (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj6* + denom_lj12 * (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* + cut_lj3*denom_lj6 * (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + englj = englj12 + englj6; + } else { + englj12 = r6inv*lj3[itype][jtype]*r6inv - + lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*r6inv + + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* + cut_lj_inner3inv*cut_lj3inv; + englj = englj12 + englj6; } - return englj; - } /* ---------------------------------------------------------------------- @@ -458,7 +462,7 @@ void PairLJCharmmfswCoulLongKokkos::allocate() d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1); - k_params = Kokkos::DualView("PairLJCharmmCoulLong::params",n+1,n+1); + k_params = Kokkos::DualView("PairLJCharmmfswCoulLong::params",n+1,n+1); params = k_params.template view(); } @@ -574,49 +578,11 @@ void PairLJCharmmfswCoulLongKokkos::init_tables(double cut_coul, dou < void PairLJCharmmCoulLong::init_style() --- > void PairLJCharmmfswCoulLong::init_style() - 686c737 - < "Pair style lj/charmm/coul/long requires atom attribute q"); - --- - > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); - 688c739 + 688c739 < // request regular or rRESPA neighbor list --- > // request regular or rRESPA neighbor lists - 705a757,766 - > cut_ljinv = 1.0/cut_lj; - > cut_lj_innerinv = 1.0/cut_lj_inner; - > cut_lj3 = cut_lj * cut_lj * cut_lj; - > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; - > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; - > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; - > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; - > cut_lj6inv = cut_lj3inv * cut_lj3inv; - > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; - > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; - 709,711c770,773 - < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - < (cut_ljsq-cut_lj_innersq) ); - < denom_lj_inv = 1.0 / denom_lj; - --- - > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - > (cut_ljsq-cut_lj_innersq); - > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); - > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); - 718,730d779 - < cut_in_off = cut_respa[0]; - < cut_in_on = cut_respa[1]; - < cut_out_on = cut_respa[2]; - < cut_out_off = cut_respa[3]; - < - < cut_in_diff = cut_in_on - cut_in_off; - < cut_out_diff = cut_out_off - cut_out_on; - < cut_in_diff_inv = 1.0 / (cut_in_diff); - < cut_out_diff_inv = 1.0 / (cut_out_diff); - < cut_in_off_sq = cut_in_off*cut_in_off; - < cut_in_on_sq = cut_in_on*cut_in_on; - < cut_out_on_sq = cut_out_on*cut_out_on; - < cut_out_off_sq = cut_out_off*cut_out_off; - + */ template @@ -689,6 +655,8 @@ template class PairLJCharmmfswCoulLongKokkos; + + /* 80d105 < memory->destroy(offset); @@ -706,6 +674,48 @@ template class PairLJCharmmfswCoulLongKokkos; < void PairLJCharmmCoulLong::coeff(int narg, char **arg) --- > void PairLJCharmmfswCoulLong::coeff(int narg, char **arg) + + 686c737 + < "Pair style lj/charmm/coul/long requires atom attribute q"); + --- + > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); + + 705a757,766 + > cut_ljinv = 1.0/cut_lj; + > cut_lj_innerinv = 1.0/cut_lj_inner; + > cut_lj3 = cut_lj * cut_lj * cut_lj; + > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; + > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; + > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; + > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; + > cut_lj6inv = cut_lj3inv * cut_lj3inv; + > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; + > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; + 709,711c770,773 + < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + < (cut_ljsq-cut_lj_innersq) ); + < denom_lj_inv = 1.0 / denom_lj; + --- + > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + > (cut_ljsq-cut_lj_innersq); + > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); + > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); + 718,730d779 + < cut_in_off = cut_respa[0]; + < cut_in_on = cut_respa[1]; + < cut_out_on = cut_respa[2]; + < cut_out_off = cut_respa[3]; + < + < cut_in_diff = cut_in_on - cut_in_off; + < cut_out_diff = cut_out_off - cut_out_on; + < cut_in_diff_inv = 1.0 / (cut_in_diff); + < cut_out_diff_inv = 1.0 / (cut_out_diff); + < cut_in_off_sq = cut_in_off*cut_in_off; + < cut_in_on_sq = cut_in_on*cut_in_on; + < cut_out_on_sq = cut_out_on*cut_out_on; + < cut_out_off_sq = cut_out_off*cut_out_off; + + 752c801 < double PairLJCharmmCoulLong::init_one(int i, int j) --- From 163805bc33176f58f9ed579f2fac2f7ed8734ce2 Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Wed, 3 Jan 2024 15:18:34 -0500 Subject: [PATCH 053/448] removed scaffolding comments and fixed "(STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)" in compute_evdwl --- src/KOKKOS/dihedral_charmmfsw_kokkos.cpp | 117 ----- src/KOKKOS/dihedral_charmmfsw_kokkos.h | 138 +----- .../pair_lj_charmmfsw_coul_long_kokkos.cpp | 450 +----------------- .../pair_lj_charmmfsw_coul_long_kokkos.h | 84 +--- 4 files changed, 7 insertions(+), 782 deletions(-) diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp index b309f3d97f..831e7d9b22 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp @@ -27,29 +27,6 @@ ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of DihedralCharmmfswKokkos exactly - same as DihedralCharmmfswKokkos but with new class name - - method: track changes from serial kspace dihedral_charmm to - dihedral_charmmfsw and apply to DihedralCharmmfswKokkos - - % diff dihedral_charmm.cpp dihedral_charmmfsw.cpp - -------------------------------------------------------------------------- */ - -/* - 18c21 - < #include "dihedral_charmm.h" - --- - > #include "dihedral_charmmfsw.h" - - */ - #include "dihedral_charmmfsw_kokkos.h" #include "atom_kokkos.h" @@ -526,15 +503,6 @@ void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute /* ---------------------------------------------------------------------- */ -/* - - 288c307 - < void DihedralCharmm::allocate() - --- - > void DihedralCharmmfsw::allocate() - - */ - template void DihedralCharmmfswKokkos::allocate() { @@ -545,15 +513,6 @@ void DihedralCharmmfswKokkos::allocate() set coeffs for one or more types ------------------------------------------------------------------------- */ -/* - - 308c327 - < void DihedralCharmm::coeff(int narg, char **arg) - --- - > void DihedralCharmmfsw::coeff(int narg, char **arg) - - */ - template void DihedralCharmmfswKokkos::coeff(int narg, char **arg) { @@ -603,40 +562,6 @@ void DihedralCharmmfswKokkos::coeff(int narg, char **arg) error check and initialize all values needed for force computation ------------------------------------------------------------------------- */ -/* - - 350c369 - < void DihedralCharmm::init_style() - --- - > void DihedralCharmmfsw::init_style() - 382a402,425 - > - > // constants for applying force switch (LJ) and force_shift (coul) - > // to 1/4 dihedral atoms to match CHARMM pairwise interactions - > - > int itmp; - > int *p_dihedflag = (int *) force->pair->extract("dihedflag", itmp); - > auto p_cutljinner = (double *) force->pair->extract("cut_lj_inner", itmp); - > auto p_cutlj = (double *) force->pair->extract("cut_lj", itmp); - > auto p_cutcoul = (double *) force->pair->extract("cut_coul", itmp); - > - > if (p_cutcoul == nullptr || p_cutljinner == nullptr || p_cutlj == nullptr || - > p_dihedflag == nullptr) - > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); - > - > dihedflag = *p_dihedflag; - > cut_coul14 = *p_cutcoul; - > cut_lj_inner14 = *p_cutljinner; - > cut_lj14 = *p_cutlj; - > - > cut_coulinv14 = 1 / cut_coul14; - > cut_lj_inner3inv = (1 / cut_lj_inner14) * (1 / cut_lj_inner14) * (1 / cut_lj_inner14); - > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; - > cut_lj3inv = (1 / cut_lj14) * (1 / cut_lj14) * (1 / cut_lj14); - > cut_lj6inv = cut_lj3inv * cut_lj3inv; - - */ - template void DihedralCharmmfswKokkos::init_style() { @@ -681,14 +606,6 @@ void DihedralCharmmfswKokkos::init_style() proc 0 reads coeffs from restart file, bcasts them ------------------------------------------------------------------------- */ -/* - - 402c445 - < void DihedralCharmm::read_restart(FILE *fp) - --- - > void DihedralCharmmfsw::read_restart(FILE *fp) - - */ template void DihedralCharmmfswKokkos::read_restart(FILE *fp) { @@ -968,37 +885,3 @@ template class DihedralCharmmfswKokkos; #endif } - - -/* - - - 355c374 - < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'pair'"); - --- - > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'pair'"); - 357c376 - < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'outer'"); - --- - > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'outer'"); - 373c392 - < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); - --- - > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); - 380c399 - < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); - --- - > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); - - 389c432 - < void DihedralCharmm::write_restart(FILE *fp) - --- - > void DihedralCharmmfsw::write_restart(FILE *fp) - 430c473 - < void DihedralCharmm::write_data(FILE *fp) - --- - > void DihedralCharmmfsw::write_data(FILE *fp) - - */ - -// nothing to do for all these, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h index 8b57b28d0c..c3842ca01d 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.h +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h @@ -11,31 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of DihedralCharmmfswKokkos exactly - same as DihedralCharmmKokkos but with new class name - - (2) second draft version: nothing changed in header file - - method: track changes from serial kspace dihedral_charmm to - dihedral_charmmfsw and apply to DihedralCharmmKokkos - - % diff dihedral_charmm.h dihedral_charmmfsw.h - -------------------------------------------------------------------------- */ - -/* - - 16c16 - < DihedralStyle(charmm,DihedralCharmm); - --- - > DihedralStyle(charmmfsw,DihedralCharmmfsw); - - */ - #ifdef DIHEDRAL_CLASS // clang-format off DihedralStyle(charmmfsw/kk,DihedralCharmmfswKokkos); @@ -44,17 +19,6 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); // clang-format on #else -/* - - 20,21c20,21 - < #ifndef LMP_DIHEDRAL_CHARMM_H - < #define LMP_DIHEDRAL_CHARMM_H - --- - > #ifndef LMP_DIHEDRAL_CHARMMFSW_H - > #define LMP_DIHEDRAL_CHARMMFSW_H - - */ - // clang-format off #ifndef LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H #define LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H @@ -62,95 +26,17 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); #include "dihedral_charmmfsw.h" #include "kokkos_type.h" #include "dihedral_charmm_kokkos.h" - -/* - s_EVM_FLOAT and TagDihedralCharmmCompute conflict because style_dihedral.h - includes both dihedral_charmm_kokkos.h and dihedral_charmmfsw_kokkos.h - so comment out definitions in here and include dihedral_charmm_kokkos.h - in dihedral_charmmfsw_kokkos.h: - - In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' - struct s_EVM_FLOAT { - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here - struct s_EVM_FLOAT { - ^ - In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' - struct s_EVM_FLOAT { - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here - struct s_EVM_FLOAT { - ^ - - */ - namespace LAMMPS_NS { -/* -struct s_EVM_FLOAT { - E_FLOAT evdwl; - E_FLOAT ecoul; - E_FLOAT emol; - F_FLOAT v[6]; - F_FLOAT vp[6]; - KOKKOS_INLINE_FUNCTION - s_EVM_FLOAT() { - evdwl = 0; - ecoul = 0; - emol = 0; - v[0] = 0; v[1] = 0; v[2] = 0; - v[3] = 0; v[4] = 0; v[5] = 0; - vp[0] = 0; vp[1] = 0; vp[2] = 0; - vp[3] = 0; vp[4] = 0; vp[5] = 0; - } - - KOKKOS_INLINE_FUNCTION - void operator+=(const s_EVM_FLOAT &rhs) { - evdwl += rhs.evdwl; - ecoul += rhs.ecoul; - emol += rhs.emol; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; - vp[0] += rhs.vp[0]; - vp[1] += rhs.vp[1]; - vp[2] += rhs.vp[2]; - vp[3] += rhs.vp[3]; - vp[4] += rhs.vp[4]; - vp[5] += rhs.vp[5]; - } -}; -typedef struct s_EVM_FLOAT EVM_FLOAT; - - */ +// s_EVM_FLOAT definition in here conflicted because style_dihedral.h +// includes both dihedral_charmm_kokkos.h and dihedral_charmmfsw_kokkos.h +// so remove definition of s_EVM_FLOAT in here and include +// dihedral_charmm_kokkos.h template struct TagDihedralCharmmfswCompute{}; - - -/* - 27c27 - < class DihedralCharmm : public Dihedral { - --- - > class DihedralCharmmfsw : public Dihedral { - 29,30c29,30 - < DihedralCharmm(class LAMMPS *); - < ~DihedralCharmm() override; - --- - > DihedralCharmmfsw(class LAMMPS *); - > ~DihedralCharmmfsw() override; - - */ - template class DihedralCharmmfswKokkos : public DihedralCharmmfsw { public: @@ -235,19 +121,3 @@ class DihedralCharmmfswKokkos : public DihedralCharmmfsw { #endif #endif - - -/* - - 38a39,43 - > int implicit, weightflag, dihedflag; - > double cut_lj_inner14, cut_lj14, cut_coul14; - > double evdwl14_12, evdwl14_6, cut_coulinv14; - > double cut_lj_inner3inv, cut_lj_inner6inv, cut_lj3inv, cut_lj6inv; - > - 42d46 - < int implicit, weightflag; - - */ - -// nothing to do here, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 7c1da2479f..191626fc9f 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -27,35 +27,6 @@ ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of PairLJCharmmfswCoulLongKokkos almost exactly - same as PairLJCharmmCoulLongKokkos but with new class name - - method: track changes from serial kspace pair_lj_charmm_coul_long to - pair_lj_charmmfsw_coul_long and apply to PairLJCharmmCoulLongKokkos - - ISSUES: - - (A) charmm denom_lj_inv cache , is it to optimize code because division - is slower that multiplication ?? - - - - ------------------------------------------------------------------------- */ - - -/* - 19c23 - < #include "pair_lj_charmm_coul_long.h" - --- - > #include "pair_lj_charmmfsw_coul_long.h" - - */ - #include "pair_lj_charmmfsw_coul_long_kokkos.h" #include "atom_kokkos.h" @@ -86,30 +57,6 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -/* - 47c51 - < PairLJCharmmCoulLong::PairLJCharmmCoulLong(LAMMPS *lmp) : Pair(lmp) - --- - > PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp) - 55a60,72 - > - > // short-range/long-range flag accessed by DihedralCharmmfsw - > - > dihedflag = 1; - > - > // switch qqr2e from LAMMPS value to CHARMM value - > - > if (strcmp(update->unit_style,"real") == 0) { - > if ((comm->me == 0) && (force->qqr2e != force->qqr2e_charmm_real)) - > error->message(FLERR,"Switching to CHARMM coulomb energy" - > " conversion constant"); - > force->qqr2e = force->qqr2e_charmm_real; - > } - - */ - -// added superclass constructor to inherit from PairLJCharmmfswCoulLong - template PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp) { @@ -125,25 +72,6 @@ PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS /* ---------------------------------------------------------------------- */ -/* - - 60c77 - < PairLJCharmmCoulLong::~PairLJCharmmCoulLong() - --- - > PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() - 61a79,87 - > // switch qqr2e back from CHARMM value to LAMMPS value - > - > if (update && strcmp(update->unit_style,"real") == 0) { - > if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real)) - > error->message(FLERR,"Restoring original LAMMPS coulomb energy" - > " conversion constant"); - > force->qqr2e = force->qqr2e_lammps_real; - > } - > - - */ - template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { @@ -159,28 +87,6 @@ PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() /* ---------------------------------------------------------------------- */ -/* - 87c112 - < void PairLJCharmmCoulLong::compute(int eflag, int vflag) - --- - > void PairLJCharmmfswCoulLong::compute(int eflag, int vflag) - 90c115 - < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; - --- - > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl12,evdwl6,ecoul,fpair; - 92c117 - < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; - --- - > double r,rinv,r2inv,r3inv,r6inv,rsq,forcecoul,forcelj,factor_coul,factor_lj; - 94c119 - < double philj,switch1,switch2; - --- - > double switch1; - 96d120 - < double rsq; - - */ - template void PairLJCharmmfswCoulLongKokkos::compute(int eflag_in, int vflag_in) { @@ -287,22 +193,6 @@ compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); if (rsq > cut_lj_innersq) { - - /* - 174,179c198,200 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - < } - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - > } - - */ - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; forcelj = forcelj*switch1; @@ -327,32 +217,6 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const F_FLOAT r3inv = rinv*rinv*rinv; F_FLOAT englj, englj12, englj6; - /* - 205d225 - < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); - 207,209c227,240 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < evdwl *= switch1; - --- - > r = sqrt(rsq); - > rinv = 1.0/r; - > r3inv = rinv*rinv*rinv; - > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > evdwl = evdwl12 + evdwl6; - > } else { - > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > evdwl6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > evdwl = evdwl12 + evdwl6; - - */ - - if (rsq > cut_lj_innersq) { englj12 = (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj6* denom_lj12 * (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); @@ -360,8 +224,8 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, cut_lj3*denom_lj6 * (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); englj = englj12 + englj6; } else { - englj12 = r6inv*lj3[itype][jtype]*r6inv - - lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + englj12 = r6inv*(STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj_inner6inv*cut_lj6inv; englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*r6inv + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* cut_lj_inner3inv*cut_lj3inv; @@ -573,18 +437,6 @@ void PairLJCharmmfswCoulLongKokkos::init_tables(double cut_coul, dou init specific to this pair style ------------------------------------------------------------------------- */ -/* - 682c733 - < void PairLJCharmmCoulLong::init_style() - --- - > void PairLJCharmmfswCoulLong::init_style() - 688c739 - < // request regular or rRESPA neighbor list - --- - > // request regular or rRESPA neighbor lists - - */ - template void PairLJCharmmfswCoulLongKokkos::init_style() { @@ -651,301 +503,3 @@ template class PairLJCharmmfswCoulLongKokkos; template class PairLJCharmmfswCoulLongKokkos; #endif } - - - - - - -/* - 80d105 - < memory->destroy(offset); - 598c650 - < void PairLJCharmmCoulLong::allocate() - --- - > void PairLJCharmmfswCoulLong::allocate() - 622d673 - < memory->create(offset,n+1,n+1,"pair:offset"); - 631c682 - < void PairLJCharmmCoulLong::settings(int narg, char **arg) - --- - > void PairLJCharmmfswCoulLong::settings(int narg, char **arg) - 645c696 - < void PairLJCharmmCoulLong::coeff(int narg, char **arg) - --- - > void PairLJCharmmfswCoulLong::coeff(int narg, char **arg) - - 686c737 - < "Pair style lj/charmm/coul/long requires atom attribute q"); - --- - > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); - - 705a757,766 - > cut_ljinv = 1.0/cut_lj; - > cut_lj_innerinv = 1.0/cut_lj_inner; - > cut_lj3 = cut_lj * cut_lj * cut_lj; - > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; - > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; - > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; - > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; - > cut_lj6inv = cut_lj3inv * cut_lj3inv; - > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; - > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; - 709,711c770,773 - < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - < (cut_ljsq-cut_lj_innersq) ); - < denom_lj_inv = 1.0 / denom_lj; - --- - > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - > (cut_ljsq-cut_lj_innersq); - > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); - > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); - 718,730d779 - < cut_in_off = cut_respa[0]; - < cut_in_on = cut_respa[1]; - < cut_out_on = cut_respa[2]; - < cut_out_off = cut_respa[3]; - < - < cut_in_diff = cut_in_on - cut_in_off; - < cut_out_diff = cut_out_off - cut_out_on; - < cut_in_diff_inv = 1.0 / (cut_in_diff); - < cut_out_diff_inv = 1.0 / (cut_out_diff); - < cut_in_off_sq = cut_in_off*cut_in_off; - < cut_in_on_sq = cut_in_on*cut_in_on; - < cut_out_on_sq = cut_out_on*cut_out_on; - < cut_out_off_sq = cut_out_off*cut_out_off; - - - 752c801 - < double PairLJCharmmCoulLong::init_one(int i, int j) - --- - > double PairLJCharmmfswCoulLong::init_one(int i, int j) - 790c839 - < void PairLJCharmmCoulLong::write_restart(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_restart(FILE *fp) - 811c860 - < void PairLJCharmmCoulLong::read_restart(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::read_restart(FILE *fp) - 842c891 - < void PairLJCharmmCoulLong::write_restart_settings(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_restart_settings(FILE *fp) - 857c906 - < void PairLJCharmmCoulLong::read_restart_settings(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::read_restart_settings(FILE *fp) - 882c931 - < void PairLJCharmmCoulLong::write_data(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_data(FILE *fp) - 893c942 - < void PairLJCharmmCoulLong::write_data_all(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_data_all(FILE *fp) - 903c952 - < double PairLJCharmmCoulLong::single(int i, int j, int itype, int jtype, - --- - > double PairLJCharmmfswCoulLong::single(int i, int j, int itype, int jtype, - 908,909c957,958 - < double r2inv,r6inv,r,grij,expm2,t,erfc,prefactor; - < double switch1,switch2,fraction,table,forcecoul,forcelj,phicoul,philj; - --- - > double r,rinv,r2inv,r3inv,r6inv,grij,expm2,t,erfc,prefactor; - > double switch1,fraction,table,forcecoul,forcelj,phicoul,philj,philj12,philj6; - 911a961,962 - > r = sqrt(rsq); - > rinv = 1.0/r; - 939c990,991 - < r6inv = r2inv*r2inv*r2inv; - --- - > r3inv = rinv*rinv*rinv; - > r6inv = r3inv*r3inv; - 943,947c995,996 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 965d1013 - < philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); - 967,969c1015,1025 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < philj *= switch1; - --- - > philj12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > philj6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > philj = philj12 + philj6; - > } else { - > philj12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > philj6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > philj = philj12 + philj6; - 979c1035 - < void *PairLJCharmmCoulLong::extract(const char *str, int &dim) - --- - > void *PairLJCharmmfswCoulLong::extract(const char *str, int &dim) - 988a1045,1047 - > - > // info extracted by dihedral_charmmfsw - > - 989a1049,1051 - > if (strcmp(str,"cut_lj_inner") == 0) return (void *) &cut_lj_inner; - > if (strcmp(str,"cut_lj") == 0) return (void *) &cut_lj; - > if (strcmp(str,"dihedflag") == 0) return (void *) &dihedflag; - - - */ - -// nothing to do for all these, inherited from PairLJCharmmfswCoulLong - - - - -/* - - 226c257 - < void PairLJCharmmCoulLong::compute_inner() - --- - > void PairLJCharmmfswCoulLong::compute_inner() - 248a280,286 - > double cut_out_on = cut_respa[0]; - > double cut_out_off = cut_respa[1]; - > - > double cut_out_diff = cut_out_off - cut_out_on; - > double cut_out_on_sq = cut_out_on*cut_out_on; - > double cut_out_off_sq = cut_out_off*cut_out_off; - > - 284c322 - < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; - --- - > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; - 303c341 - < void PairLJCharmmCoulLong::compute_middle() - --- - > void PairLJCharmmfswCoulLong::compute_middle() - 308c346 - < double philj,switch1,switch2; - --- - > double switch1; - 326a365,376 - > double cut_in_off = cut_respa[0]; - > double cut_in_on = cut_respa[1]; - > double cut_out_on = cut_respa[2]; - > double cut_out_off = cut_respa[3]; - > - > double cut_in_diff = cut_in_on - cut_in_off; - > double cut_out_diff = cut_out_off - cut_out_on; - > double cut_in_off_sq = cut_in_off*cut_in_off; - > double cut_in_on_sq = cut_in_on*cut_in_on; - > double cut_out_on_sq = cut_out_on*cut_out_on; - > double cut_out_off_sq = cut_out_off*cut_out_off; - > - 361,365c411,412 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 370c417 - < rsw = (sqrt(rsq) - cut_in_off)*cut_in_diff_inv; - --- - > rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; - 374c421 - < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; - --- - > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; - 393c440 - < void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag) - --- - > void PairLJCharmmfswCoulLong::compute_outer(int eflag, int vflag) - 396c443 - < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; - --- - > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl6,evdwl12,ecoul,fpair; - 398c445 - < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; - --- - > double r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; - 400c447 - < double philj,switch1,switch2; - --- - > double switch1; - 422a470,476 - > double cut_in_off = cut_respa[2]; - > double cut_in_on = cut_respa[3]; - > - > double cut_in_diff = cut_in_on - cut_in_off; - > double cut_in_off_sq = cut_in_off*cut_in_off; - > double cut_in_on_sq = cut_in_on*cut_in_on; - > - 448a503 - > r6inv = r2inv*r2inv*r2inv; - 489d543 - < r6inv = r2inv*r2inv*r2inv; - 493,497c547,548 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 533d583 - < r6inv = r2inv*r2inv*r2inv; - 536,538c586,598 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < evdwl *= switch1; - --- - > rinv = sqrt(r2inv); - > r3inv = r2inv*rinv; - > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > evdwl = evdwl12 + evdwl6; - > } else { - > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > evdwl6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > evdwl = evdwl12 + evdwl6; - 561d620 - < r6inv = r2inv*r2inv*r2inv; - 565,569c624,625 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 572d627 - < r6inv = r2inv*r2inv*r2inv; - 576,580c631,632 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - - */ - -// kokkos doesnt support respa, so ignore compute_inner / compute_middle / compute_outer diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h index e9a6b5486f..8fdb8543ed 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h @@ -11,29 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of PairLJCharmmfswCoulLongKokkos exactly - same as PairLJCharmmCoulLongKokkos but with new class name - - method: track changes from serial kspace pair_lj_charmm_coul_long to - pair_lj_charmmfsw_coul_long and apply to PairLJCharmmfswCoulLongKokkos - - % diff pair_lj_charmm_coul_long.h pair_lj_charmmfsw_coul_long.h - - -------------------------------------------------------------------------- */ - -/* - 16c16 - < PairStyle(lj/charmm/coul/long,PairLJCharmmCoulLong); - --- - > PairStyle(lj/charmmfsw/coul/long,PairLJCharmmfswCoulLong); - - */ #ifdef PAIR_CLASS // clang-format off @@ -43,17 +20,6 @@ PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos #ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H - > #define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H - - */ - // clang-format off #ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H #define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H @@ -64,15 +30,6 @@ PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos class PairLJCharmmfswCoulLong : public Pair { - - */ - template class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { public: @@ -80,18 +37,7 @@ class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { enum {COUL_FLAG=1}; typedef DeviceType device_type; typedef ArrayTypes AT; - - /* - - 29,30c29,30 - < PairLJCharmmCoulLong(class LAMMPS *); - < ~PairLJCharmmCoulLong() override; - --- - > PairLJCharmmfswCoulLong(class LAMMPS *); - > ~PairLJCharmmfswCoulLong() override; - - */ - + PairLJCharmmfswCoulLongKokkos(class LAMMPS *); ~PairLJCharmmfswCoulLongKokkos() override; @@ -103,34 +49,6 @@ class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { protected: - /* - 52c52,54 - < double cut_lj_inner, cut_lj; - --- - > int dihedflag; - > - > double cut_lj_inner, cut_lj, cut_ljinv, cut_lj_innerinv; - 53a56,57 - > double cut_lj3inv, cut_lj_inner3inv, cut_lj3, cut_lj_inner3; - > double cut_lj6inv, cut_lj_inner6inv, cut_lj6, cut_lj_inner6; - 56,60c60 - < double cut_in_off, cut_in_on, cut_out_off, cut_out_on; - < double cut_in_diff, cut_out_diff; - < double cut_in_diff_inv, cut_out_diff_inv; - < double cut_in_off_sq, cut_in_on_sq, cut_out_off_sq, cut_out_on_sq; - < double denom_lj, denom_lj_inv; - --- - > double denom_lj, denom_lj12, denom_lj6; - - */ - - // almost nothing to do here, inherited from PairLJCharmmfswCoulLong - // only temporarily need cut_lj_innersq, denom_coul protected variables - // (removed from pair_lj_charmm_coul_long to pair_lj_charmmfsw_coul_long) - // to compile draft version 1, can be removed by draft version 2 - - - template KOKKOS_INLINE_FUNCTION F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, From 600eaf837b5d911bea54d224b6a3616accc2935d Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 3 Jan 2024 16:20:04 -0500 Subject: [PATCH 054/448] update preferred contact info --- src/REACTION/README | 3 ++- src/REACTION/fix_bond_react.cpp | 2 +- src/REACTION/fix_bond_react.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/REACTION/README b/src/REACTION/README index 99a5d604ec..b9199d6d47 100644 --- a/src/REACTION/README +++ b/src/REACTION/README @@ -25,4 +25,5 @@ The REACTER methodology is detailed in: https://doi.org/10.1021/acs.macromol.0c02012 This package was created by Jacob Gissinger -(jacob.r.gissinger@gmail.com) at the NASA Langley Research Center. +(jgissing@stevens.edu) while at the NASA Langley Research Center +and Stevens Institute of Technology. diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 10a7023e17..e704160e93 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -13,7 +13,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- -Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com) +Contributing Author: Jacob Gissinger (jgissing@stevens.edu) ------------------------------------------------------------------------- */ #include "fix_bond_react.h" diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 534261e11d..3d56c2fc7b 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com) + Contributing Author: Jacob Gissinger (jgissing@stevens.edu) ------------------------------------------------------------------------- */ #ifdef FIX_CLASS From a6b00a60b208b17cd25fe6ff2a1d7c0000718bc0 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 3 Jan 2024 20:31:22 -0500 Subject: [PATCH 055/448] additional check/warning for valid templates --- src/REACTION/fix_bond_react.cpp | 35 +++++++++++++++++++++++++++++---- src/REACTION/fix_bond_react.h | 2 +- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index e704160e93..00292438ec 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -2681,16 +2681,43 @@ void FixBondReact::find_landlocked_atoms(int myrxn) } // also, if atoms change number of bonds, but aren't landlocked, that could be bad + int warnflag = 0; if (comm->me == 0) for (int i = 0; i < twomol->natoms; i++) { if ((create_atoms[i][myrxn] == 0) && (twomol_nxspecial[i][0] != onemol_nxspecial[equivalences[i][1][myrxn]-1][0]) && - (landlocked_atoms[i][myrxn] == 0)) - error->warning(FLERR, "Fix bond/react: Atom affected by reaction {} is too close " - "to template edge",rxn_name[myrxn]); - break; + (landlocked_atoms[i][myrxn] == 0)) { + warnflag = 1; + break; + } } + // also, if an atom changes any of its bonds, but is not landlocked, that could be bad + int thereflag; + if (comm->me == 0) + for (int i = 0; i < twomol->natoms; i++) { + if (landlocked_atoms[i][myrxn] == 1) continue; + for (int j = 0; j < twomol_nxspecial[i][0]; j++) { + int oneneighID = equivalences[twomol_xspecial[i][j]-1][1][myrxn]; + int ii = equivalences[i][1][myrxn] - 1; + thereflag = 0; + for (int k = 0; k < onemol_nxspecial[ii][0]; k++) { + if (oneneighID == onemol_xspecial[ii][k]) { + thereflag = 1; + break; + } + } + if (thereflag == 0) { + warnflag = 1; + break; + } + } + if (warnflag == 1) break; + } + + if (comm->me == 0 && warnflag == 1) error->warning(FLERR, "Fix bond/react: Atom affected " + "by reaction {} is too close to template edge",rxn_name[myrxn]); + // finally, if a created atom is not landlocked, bad! for (int i = 0; i < twomol->natoms; i++) { if (create_atoms[i][myrxn] == 1 && landlocked_atoms[i][myrxn] == 0) { diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 3d56c2fc7b..8c9fc9dce4 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -139,7 +139,7 @@ class FixBondReact : public Fix { int avail_guesses; // num of restore points available int *guess_branch; // used when there is more than two choices when guessing int **restore_pt; // contains info about restore points - tagint **restore; // contaings info about restore points + tagint **restore; // contains info about restore points int *pioneer_count; // counts pioneers int **edge; // atoms in molecule templates with incorrect valences From 13b6d40062b01a078dd86bd6fe517e15ac731178 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 3 Jan 2024 20:38:10 -0500 Subject: [PATCH 056/448] tiny_epoxy example correction not sure why this issue showed up in recent LAMMPS versions --- examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized index ea09d06893..7e0350cdb0 100644 --- a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized +++ b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized @@ -20,7 +20,8 @@ improper_style class2 special_bonds lj/coul 0 0 1 pair_modify tail yes mix sixthpower -read_data tiny_epoxy.data +read_data tiny_epoxy.data & + extra/special/per/atom 25 velocity all create 300.0 4928459 dist gaussian From de1f6eefd7c167e4980f4173ad1e0b65d48ea4c5 Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Wed, 3 Jan 2024 23:00:07 -0500 Subject: [PATCH 057/448] // FIXME: // superclass destructor from KSPACE/pair_lj_charmmfsw_coul_long.cpp:81 // resets force->qqr2e = force->qqr2e_lammps_real at end of timestep 0 // causing ~E-6 errors for steps 1,2,... everywhere in this class when // running kokkos with openmp (and probably with GPUs also). // // WORKAROUND: for now until guidance from lammps devs is to // reset it back force->qqr2e = force->qqr2e_charmm_real here. --- src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 191626fc9f..7701f13768 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -76,6 +76,17 @@ template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { + // FIXME: + // superclass destructor from KSPACE/pair_lj_charmmfsw_coul_long.cpp:81 + // resets force->qqr2e = force->qqr2e_lammps_real at end of timestep 0 + // causing ~E-6 errors for steps 1,2,... everywhere in this class when + // running kokkos with openmp (and probably with GPUs also). + // + // WORKAROUND: for now until guidance from lammps devs is to + // reset it back force->qqr2e = force->qqr2e_charmm_real here. + + force->qqr2e = force->qqr2e_charmm_real; + if (copymode) return; if (allocated) { From c065d4bac626bd7fd2912cc1bcd368078a2f587e Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Thu, 4 Jan 2024 00:50:06 -0500 Subject: [PATCH 058/448] // FIXME: destructor from this class resets // // force->qqr2e = force->qqr2e_lammps_real // // at end of timestep 0 causing ~E-6 errors for steps 1,2,... // everywhere in pair_lj_charmmfsw_coul_long_kokkos when // running kokkos with openmp (and probably with GPUs also). // // WORKAROUND: for now until guidance from lammps devs is to // comment out this line here (commit to be reversed later). //force->qqr2e = force->qqr2e_lammps_real; --- src/KSPACE/pair_lj_charmmfsw_coul_long.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp index b7635c49c7..83b7293178 100644 --- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp +++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp @@ -82,7 +82,19 @@ PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real)) error->message(FLERR,"Restoring original LAMMPS coulomb energy" " conversion constant"); - force->qqr2e = force->qqr2e_lammps_real; + + // FIXME: destructor from this class resets + // + // force->qqr2e = force->qqr2e_lammps_real + // + // at end of timestep 0 causing ~E-6 errors for steps 1,2,... + // everywhere in pair_lj_charmmfsw_coul_long_kokkos when + // running kokkos with openmp (and probably with GPUs also). + // + // WORKAROUND: for now until guidance from lammps devs is to + // comment out this line here (commit to be reversed later). + + //force->qqr2e = force->qqr2e_lammps_real; } if (copymode) return; From ac9afb26dd7ea27b40bbcd2bd44fcdf1ae3932ae Mon Sep 17 00:00:00 2001 From: Evangelos Voyiatzis Date: Thu, 4 Jan 2024 17:38:14 +0200 Subject: [PATCH 059/448] aesthetic optimization for xscale, yscale and zscale --- src/fix_indent.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/fix_indent.cpp b/src/fix_indent.cpp index d3e80ecd79..28dcac08d0 100644 --- a/src/fix_indent.cpp +++ b/src/fix_indent.cpp @@ -82,13 +82,9 @@ FixIndent::FixIndent(LAMMPS *lmp, int narg, char **arg) : // setup scaling - double xscale,yscale,zscale; - if (scaleflag) { - xscale = domain->lattice->xlattice; - yscale = domain->lattice->ylattice; - zscale = domain->lattice->zlattice; - } - else xscale = yscale = zscale = 1.0; + const double xscale { scaleflag ? domain->lattice->xlattice : 1.0}; + const double yscale { scaleflag ? domain->lattice->ylattice : 1.0}; + const double zscale { scaleflag ? domain->lattice->zlattice : 1.0}; // apply scaling factors to geometry From 4b4e796c190734090c80b2badd2023a66e08bb6c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 4 Jan 2024 16:06:41 -0500 Subject: [PATCH 060/448] Revert " // FIXME:" This reverts commit de1f6eefd7c167e4980f4173ad1e0b65d48ea4c5. my first idea to reset back force->qqr2e = force->qqr2e_charmm_real didnt work because class destructor gets called first THEN superclass destructor gets called --- src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 7701f13768..191626fc9f 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -76,17 +76,6 @@ template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { - // FIXME: - // superclass destructor from KSPACE/pair_lj_charmmfsw_coul_long.cpp:81 - // resets force->qqr2e = force->qqr2e_lammps_real at end of timestep 0 - // causing ~E-6 errors for steps 1,2,... everywhere in this class when - // running kokkos with openmp (and probably with GPUs also). - // - // WORKAROUND: for now until guidance from lammps devs is to - // reset it back force->qqr2e = force->qqr2e_charmm_real here. - - force->qqr2e = force->qqr2e_charmm_real; - if (copymode) return; if (allocated) { From f9aafff9928c43f27728647ebcb5b5f245f0ad19 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 4 Jan 2024 22:45:53 -0500 Subject: [PATCH 061/448] must include fmt/ranges.h for fmt::join() --- src/KIM/kim_interactions.cpp | 2 ++ src/KIM/kim_param.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/KIM/kim_interactions.cpp b/src/KIM/kim_interactions.cpp index 1f4f84e648..ce550bf5da 100644 --- a/src/KIM/kim_interactions.cpp +++ b/src/KIM/kim_interactions.cpp @@ -70,6 +70,8 @@ #include "modify.h" #include "update.h" +#include "fmt/ranges.h" + #include #include diff --git a/src/KIM/kim_param.cpp b/src/KIM/kim_param.cpp index f72df81989..c50474fe67 100644 --- a/src/KIM/kim_param.cpp +++ b/src/KIM/kim_param.cpp @@ -68,6 +68,8 @@ #include "pair_kim.h" #include "variable.h" +#include "fmt/ranges.h" + #include #include #include From 01482e7a2e0324eabc3dc0b81809573f93ce89b4 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 4 Jan 2024 22:46:40 -0500 Subject: [PATCH 062/448] update fmtlib to version 10.2.1 --- src/fmt/args.h | 13 +- src/fmt/chrono.h | 272 ++++++++-------- src/fmt/color.h | 77 +++-- src/fmt/compile.h | 17 +- src/fmt/core.h | 647 +++++++++++++++++++------------------- src/fmt/format-inl.h | 161 +++++----- src/fmt/format.h | 718 ++++++++++++++++++++++--------------------- src/fmt/os.h | 64 ++-- src/fmt/ostream.h | 100 ++++-- src/fmt/printf.h | 34 +- src/fmt/ranges.h | 117 +++++-- src/fmt/std.h | 154 +++++++--- src/fmt/xchar.h | 26 +- src/fmtlib_os.cpp | 67 ++-- 14 files changed, 1384 insertions(+), 1083 deletions(-) diff --git a/src/fmt/args.h b/src/fmt/args.h index 2d684e7cc1..b77a2d0661 100644 --- a/src/fmt/args.h +++ b/src/fmt/args.h @@ -12,7 +12,7 @@ #include // std::unique_ptr #include -#include "core.h" +#include "format.h" // std_string_view FMT_BEGIN_NAMESPACE @@ -22,8 +22,9 @@ template struct is_reference_wrapper : std::false_type {}; template struct is_reference_wrapper> : std::true_type {}; -template const T& unwrap(const T& v) { return v; } -template const T& unwrap(const std::reference_wrapper& v) { +template auto unwrap(const T& v) -> const T& { return v; } +template +auto unwrap(const std::reference_wrapper& v) -> const T& { return static_cast(v); } @@ -50,7 +51,7 @@ class dynamic_arg_list { std::unique_ptr> head_; public: - template const T& push(const Arg& arg) { + template auto push(const Arg& arg) -> const T& { auto new_node = std::unique_ptr>(new typed_node(arg)); auto& value = new_node->value; new_node->next = std::move(head_); @@ -110,14 +111,14 @@ class dynamic_format_arg_store friend class basic_format_args; - unsigned long long get_types() const { + auto get_types() const -> unsigned long long { return detail::is_unpacked_bit | data_.size() | (named_info_.empty() ? 0ULL : static_cast(detail::has_named_args_bit)); } - const basic_format_arg* data() const { + auto data() const -> const basic_format_arg* { return named_info_.empty() ? data_.data() : data_.data() + 1; } diff --git a/src/fmt/chrono.h b/src/fmt/chrono.h index ff3e1445b9..9d54574e16 100644 --- a/src/fmt/chrono.h +++ b/src/fmt/chrono.h @@ -18,7 +18,7 @@ #include #include -#include "format.h" +#include "ostream.h" // formatbuf FMT_BEGIN_NAMESPACE @@ -72,7 +72,8 @@ template ::value && std::numeric_limits::is_signed == std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -101,7 +102,8 @@ template ::value && std::numeric_limits::is_signed != std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -133,7 +135,8 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; return from; } // function @@ -154,7 +157,7 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { // clang-format on template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; using T = std::numeric_limits; static_assert(std::is_floating_point::value, "From must be floating"); @@ -176,7 +179,7 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; static_assert(std::is_floating_point::value, "From must be floating"); return from; @@ -188,8 +191,8 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value), FMT_ENABLE_IF(std::is_integral::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; // the basic idea is that we need to convert from count() in the from type @@ -240,8 +243,8 @@ To safe_duration_cast(std::chrono::duration from, template ::value), FMT_ENABLE_IF(std::is_floating_point::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; if (std::isnan(from.count())) { @@ -321,12 +324,12 @@ To safe_duration_cast(std::chrono::duration from, namespace detail { template struct null {}; -inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); } -inline null<> localtime_s(...) { return null<>(); } -inline null<> gmtime_r(...) { return null<>(); } -inline null<> gmtime_s(...) { return null<>(); } +inline auto localtime_r FMT_NOMACRO(...) -> null<> { return null<>(); } +inline auto localtime_s(...) -> null<> { return null<>(); } +inline auto gmtime_r(...) -> null<> { return null<>(); } +inline auto gmtime_s(...) -> null<> { return null<>(); } -inline const std::locale& get_classic_locale() { +inline auto get_classic_locale() -> const std::locale& { static const auto& locale = std::locale::classic(); return locale; } @@ -336,8 +339,6 @@ template struct codecvt_result { CodeUnit buf[max_size]; CodeUnit* end; }; -template -constexpr const size_t codecvt_result::max_size; template void write_codecvt(codecvt_result& out, string_view in_buf, @@ -408,8 +409,7 @@ inline void do_write(buffer& buf, const std::tm& time, auto&& format_buf = formatbuf>(buf); auto&& os = std::basic_ostream(&format_buf); os.imbue(loc); - using iterator = std::ostreambuf_iterator; - const auto& facet = std::use_facet>(loc); + const auto& facet = std::use_facet>(loc); auto end = facet.put(os, os, Char(' '), &time, format, modifier); if (end.failed()) FMT_THROW(format_error("failed to format time")); } @@ -432,6 +432,51 @@ auto write(OutputIt out, const std::tm& time, const std::locale& loc, return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc); } +template +struct is_same_arithmetic_type + : public std::integral_constant::value && + std::is_integral::value) || + (std::is_floating_point::value && + std::is_floating_point::value)> { +}; + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { +#if FMT_SAFE_DURATION_CAST + // Throwing version of safe_duration_cast is only available for + // integer to integer or float to float casts. + int ec; + To to = safe_duration_cast::safe_duration_cast(from, ec); + if (ec) FMT_THROW(format_error("cannot format duration")); + return to; +#else + // Standard duration cast, may overflow. + return std::chrono::duration_cast(from); +#endif +} + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(!is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { + // Mixed integer <-> float cast is not supported by safe_duration_cast. + return std::chrono::duration_cast(from); +} + +template +auto to_time_t( + std::chrono::time_point time_point) + -> std::time_t { + // Cannot use std::chrono::system_clock::to_time_t since this would first + // require a cast to std::chrono::system_clock::time_point, which could + // overflow. + return fmt_duration_cast>( + time_point.time_since_epoch()) + .count(); +} } // namespace detail FMT_BEGIN_EXPORT @@ -441,29 +486,29 @@ FMT_BEGIN_EXPORT expressed in local time. Unlike ``std::localtime``, this function is thread-safe on most platforms. */ -inline std::tm localtime(std::time_t time) { +inline auto localtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(localtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(localtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { using namespace fmt::detail; std::tm* tm = std::localtime(&time_); if (tm) tm_ = *tm; @@ -480,8 +525,8 @@ inline std::tm localtime(std::time_t time) { #if FMT_USE_LOCAL_TIME template inline auto localtime(std::chrono::local_time time) -> std::tm { - return localtime(std::chrono::system_clock::to_time_t( - std::chrono::current_zone()->to_sys(time))); + return localtime( + detail::to_time_t(std::chrono::current_zone()->to_sys(time))); } #endif @@ -490,29 +535,29 @@ inline auto localtime(std::chrono::local_time time) -> std::tm { expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this function is thread-safe on most platforms. */ -inline std::tm gmtime(std::time_t time) { +inline auto gmtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(gmtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(gmtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { std::tm* tm = std::gmtime(&time_); if (tm) tm_ = *tm; return tm != nullptr; @@ -525,9 +570,11 @@ inline std::tm gmtime(std::time_t time) { return gt.tm_; } -inline std::tm gmtime( - std::chrono::time_point time_point) { - return gmtime(std::chrono::system_clock::to_time_t(time_point)); +template +inline auto gmtime( + std::chrono::time_point time_point) + -> std::tm { + return gmtime(detail::to_time_t(time_point)); } namespace detail { @@ -566,7 +613,8 @@ inline void write_digit2_separated(char* buf, unsigned a, unsigned b, } } -template FMT_CONSTEXPR inline const char* get_units() { +template +FMT_CONSTEXPR inline auto get_units() -> const char* { if (std::is_same::value) return "as"; if (std::is_same::value) return "fs"; if (std::is_same::value) return "ps"; @@ -584,8 +632,9 @@ template FMT_CONSTEXPR inline const char* get_units() { if (std::is_same::value) return "Ts"; if (std::is_same::value) return "Ps"; if (std::is_same::value) return "Es"; - if (std::is_same>::value) return "m"; + if (std::is_same>::value) return "min"; if (std::is_same>::value) return "h"; + if (std::is_same>::value) return "d"; return nullptr; } @@ -621,9 +670,8 @@ auto write_padding(OutputIt out, pad_type pad) -> OutputIt { // Parses a put_time-like format string and invokes handler actions. template -FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, - const Char* end, - Handler&& handler) { +FMT_CONSTEXPR auto parse_chrono_format(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { if (begin == end || *begin == '}') return begin; if (*begin != '%') FMT_THROW(format_error("invalid format")); auto ptr = begin; @@ -954,25 +1002,25 @@ struct tm_format_checker : null_chrono_spec_handler { FMT_CONSTEXPR void on_tz_name() {} }; -inline const char* tm_wday_full_name(int wday) { +inline auto tm_wday_full_name(int wday) -> const char* { static constexpr const char* full_name_list[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?"; } -inline const char* tm_wday_short_name(int wday) { +inline auto tm_wday_short_name(int wday) -> const char* { static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???"; } -inline const char* tm_mon_full_name(int mon) { +inline auto tm_mon_full_name(int mon) -> const char* { static constexpr const char* full_name_list[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?"; } -inline const char* tm_mon_short_name(int mon) { +inline auto tm_mon_short_name(int mon) -> const char* { static constexpr const char* short_name_list[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", @@ -1004,21 +1052,21 @@ inline void tzset_once() { // Converts value to Int and checks that it's in the range [0, upper). template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { - FMT_ASSERT(std::is_unsigned::value || - (value >= 0 && to_unsigned(value) <= to_unsigned(upper)), - "invalid value"); - (void)upper; +inline auto to_nonnegative_int(T value, Int upper) -> Int { + if (!std::is_unsigned::value && + (value < 0 || to_unsigned(value) > to_unsigned(upper))) { + FMT_THROW(fmt::format_error("chrono value is out of range")); + } return static_cast(value); } template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { +inline auto to_nonnegative_int(T value, Int upper) -> Int { if (value < 0 || value > static_cast(upper)) FMT_THROW(format_error("invalid value")); return static_cast(value); } -constexpr long long pow10(std::uint32_t n) { +constexpr auto pow10(std::uint32_t n) -> long long { return n == 0 ? 1 : 10 * pow10(n - 1); } @@ -1052,13 +1100,12 @@ void write_fractional_seconds(OutputIt& out, Duration d, int precision = -1) { std::chrono::seconds::rep>::type, std::ratio<1, detail::pow10(num_fractional_digits)>>; - const auto fractional = - d - std::chrono::duration_cast(d); + const auto fractional = d - fmt_duration_cast(d); const auto subseconds = std::chrono::treat_as_floating_point< typename subsecond_precision::rep>::value ? fractional.count() - : std::chrono::duration_cast(fractional).count(); + : fmt_duration_cast(fractional).count(); auto n = static_cast>(subseconds); const int num_digits = detail::count_digits(n); @@ -1109,11 +1156,11 @@ void write_floating_seconds(memory_buffer& buf, Duration duration, num_fractional_digits = 6; } - format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), - std::fmod(val * static_cast(Duration::period::num) / - static_cast(Duration::period::den), - static_cast(60)), - num_fractional_digits); + fmt::format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), + std::fmod(val * static_cast(Duration::period::num) / + static_cast(Duration::period::den), + static_cast(60)), + num_fractional_digits); } template (l); } - // Algorithm: - // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date + // Algorithm: https://en.wikipedia.org/wiki/ISO_week_date. auto iso_year_weeks(long long curr_year) const noexcept -> int { const auto prev_year = curr_year - 1; const auto curr_p = @@ -1315,7 +1361,7 @@ class tm_writer { subsecs_(subsecs), tm_(tm) {} - OutputIt out() const { return out_; } + auto out() const -> OutputIt { return out_; } FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { out_ = copy_str(begin, end, out_); @@ -1579,6 +1625,7 @@ struct chrono_format_checker : null_chrono_spec_handler { template FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + FMT_CONSTEXPR void on_day_of_year() {} FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {} FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {} FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {} @@ -1597,16 +1644,16 @@ struct chrono_format_checker : null_chrono_spec_handler { template ::value&& has_isfinite::value)> -inline bool isfinite(T) { +inline auto isfinite(T) -> bool { return true; } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return x % static_cast(y); } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return std::fmod(x, static_cast(y)); } @@ -1621,49 +1668,38 @@ template struct make_unsigned_or_unchanged { using type = typename std::make_unsigned::type; }; -#if FMT_SAFE_DURATION_CAST -// throwing version of safe_duration_cast -template -To fmt_safe_duration_cast(std::chrono::duration from) { - int ec; - To to = safe_duration_cast::safe_duration_cast(from, ec); - if (ec) FMT_THROW(format_error("cannot format duration")); - return to; -} -#endif - template ::value)> -inline std::chrono::duration get_milliseconds( - std::chrono::duration d) { +inline auto get_milliseconds(std::chrono::duration d) + -> std::chrono::duration { // this may overflow and/or the result may not fit in the // target type. #if FMT_SAFE_DURATION_CAST using CommonSecondsType = typename std::common_type::type; - const auto d_as_common = fmt_safe_duration_cast(d); + const auto d_as_common = fmt_duration_cast(d); const auto d_as_whole_seconds = - fmt_safe_duration_cast(d_as_common); + fmt_duration_cast(d_as_common); // this conversion should be nonproblematic const auto diff = d_as_common - d_as_whole_seconds; const auto ms = - fmt_safe_duration_cast>(diff); + fmt_duration_cast>(diff); return ms; #else - auto s = std::chrono::duration_cast(d); - return std::chrono::duration_cast(d - s); + auto s = fmt_duration_cast(d); + return fmt_duration_cast(d - s); #endif } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int) { +auto format_duration_value(OutputIt out, Rep val, int) -> OutputIt { return write(out, val); } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int precision) { +auto format_duration_value(OutputIt out, Rep val, int precision) -> OutputIt { auto specs = format_specs(); specs.precision = precision; specs.type = precision >= 0 ? presentation_type::fixed_lower @@ -1672,12 +1708,12 @@ OutputIt format_duration_value(OutputIt out, Rep val, int precision) { } template -OutputIt copy_unit(string_view unit, OutputIt out, Char) { +auto copy_unit(string_view unit, OutputIt out, Char) -> OutputIt { return std::copy(unit.begin(), unit.end(), out); } template -OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { +auto copy_unit(string_view unit, OutputIt out, wchar_t) -> OutputIt { // This works when wchar_t is UTF-32 because units only contain characters // that have the same representation in UTF-16 and UTF-32. utf8_to_utf16 u(unit); @@ -1685,7 +1721,7 @@ OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { } template -OutputIt format_duration_unit(OutputIt out) { +auto format_duration_unit(OutputIt out) -> OutputIt { if (const char* unit = get_units()) return copy_unit(string_view(unit), out, Char()); *out++ = '['; @@ -1752,18 +1788,12 @@ struct chrono_formatter { // this may overflow and/or the result may not fit in the // target type. -#if FMT_SAFE_DURATION_CAST // might need checked conversion (rep!=Rep) - auto tmpval = std::chrono::duration(val); - s = fmt_safe_duration_cast(tmpval); -#else - s = std::chrono::duration_cast( - std::chrono::duration(val)); -#endif + s = fmt_duration_cast(std::chrono::duration(val)); } // returns true if nan or inf, writes to out. - bool handle_nan_inf() { + auto handle_nan_inf() -> bool { if (isfinite(val)) { return false; } @@ -1780,17 +1810,22 @@ struct chrono_formatter { return true; } - Rep hour() const { return static_cast(mod((s.count() / 3600), 24)); } + auto days() const -> Rep { return static_cast(s.count() / 86400); } + auto hour() const -> Rep { + return static_cast(mod((s.count() / 3600), 24)); + } - Rep hour12() const { + auto hour12() const -> Rep { Rep hour = static_cast(mod((s.count() / 3600), 12)); return hour <= 0 ? 12 : hour; } - Rep minute() const { return static_cast(mod((s.count() / 60), 60)); } - Rep second() const { return static_cast(mod(s.count(), 60)); } + auto minute() const -> Rep { + return static_cast(mod((s.count() / 60), 60)); + } + auto second() const -> Rep { return static_cast(mod(s.count(), 60)); } - std::tm time() const { + auto time() const -> std::tm { auto time = std::tm(); time.tm_hour = to_nonnegative_int(hour(), 24); time.tm_min = to_nonnegative_int(minute(), 60); @@ -1858,10 +1893,14 @@ struct chrono_formatter { void on_dec0_week_of_year(numeric_system) {} void on_dec1_week_of_year(numeric_system) {} void on_iso_week_of_year(numeric_system) {} - void on_day_of_year() {} void on_day_of_month(numeric_system) {} void on_day_of_month_space(numeric_system) {} + void on_day_of_year() { + if (handle_nan_inf()) return; + write(days(), 0); + } + void on_24_hour(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; @@ -1968,7 +2007,7 @@ class weekday { weekday() = default; explicit constexpr weekday(unsigned wd) noexcept : value(static_cast(wd != 7 ? wd : 0)) {} - constexpr unsigned c_encoding() const noexcept { return value; } + constexpr auto c_encoding() const noexcept -> unsigned { return value; } }; class year_month_day {}; @@ -2083,25 +2122,22 @@ struct formatter, period::num != 1 || period::den != 1 || std::is_floating_point::value)) { const auto epoch = val.time_since_epoch(); - auto subsecs = std::chrono::duration_cast( - epoch - std::chrono::duration_cast(epoch)); + auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); if (subsecs.count() < 0) { auto second = - std::chrono::duration_cast(std::chrono::seconds(1)); + detail::fmt_duration_cast(std::chrono::seconds(1)); if (epoch.count() < ((Duration::min)() + second).count()) FMT_THROW(format_error("duration is too small")); subsecs += second; val -= second; } - return formatter::do_format( - gmtime(std::chrono::time_point_cast(val)), ctx, - &subsecs); + return formatter::do_format(gmtime(val), ctx, &subsecs); } - return formatter::format( - gmtime(std::chrono::time_point_cast(val)), ctx); + return formatter::format(gmtime(val), ctx); } }; @@ -2120,17 +2156,13 @@ struct formatter, Char> if (period::num != 1 || period::den != 1 || std::is_floating_point::value) { const auto epoch = val.time_since_epoch(); - const auto subsecs = std::chrono::duration_cast( - epoch - std::chrono::duration_cast(epoch)); + const auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); - return formatter::do_format( - localtime(std::chrono::time_point_cast(val)), - ctx, &subsecs); + return formatter::do_format(localtime(val), ctx, &subsecs); } - return formatter::format( - localtime(std::chrono::time_point_cast(val)), - ctx); + return formatter::format(localtime(val), ctx); } }; #endif diff --git a/src/fmt/color.h b/src/fmt/color.h index 8697e1ca0b..464519e582 100644 --- a/src/fmt/color.h +++ b/src/fmt/color.h @@ -233,7 +233,7 @@ class text_style { FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept : set_foreground_color(), set_background_color(), ems(em) {} - FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) { + FMT_CONSTEXPR auto operator|=(const text_style& rhs) -> text_style& { if (!set_foreground_color) { set_foreground_color = rhs.set_foreground_color; foreground_color = rhs.foreground_color; @@ -257,29 +257,29 @@ class text_style { return *this; } - friend FMT_CONSTEXPR text_style operator|(text_style lhs, - const text_style& rhs) { + friend FMT_CONSTEXPR auto operator|(text_style lhs, const text_style& rhs) + -> text_style { return lhs |= rhs; } - FMT_CONSTEXPR bool has_foreground() const noexcept { + FMT_CONSTEXPR auto has_foreground() const noexcept -> bool { return set_foreground_color; } - FMT_CONSTEXPR bool has_background() const noexcept { + FMT_CONSTEXPR auto has_background() const noexcept -> bool { return set_background_color; } - FMT_CONSTEXPR bool has_emphasis() const noexcept { + FMT_CONSTEXPR auto has_emphasis() const noexcept -> bool { return static_cast(ems) != 0; } - FMT_CONSTEXPR detail::color_type get_foreground() const noexcept { + FMT_CONSTEXPR auto get_foreground() const noexcept -> detail::color_type { FMT_ASSERT(has_foreground(), "no foreground specified for this style"); return foreground_color; } - FMT_CONSTEXPR detail::color_type get_background() const noexcept { + FMT_CONSTEXPR auto get_background() const noexcept -> detail::color_type { FMT_ASSERT(has_background(), "no background specified for this style"); return background_color; } - FMT_CONSTEXPR emphasis get_emphasis() const noexcept { + FMT_CONSTEXPR auto get_emphasis() const noexcept -> emphasis { FMT_ASSERT(has_emphasis(), "no emphasis specified for this style"); return ems; } @@ -297,9 +297,11 @@ class text_style { } } - friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept; + friend FMT_CONSTEXPR auto fg(detail::color_type foreground) noexcept + -> text_style; - friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept; + friend FMT_CONSTEXPR auto bg(detail::color_type background) noexcept + -> text_style; detail::color_type foreground_color; detail::color_type background_color; @@ -309,16 +311,19 @@ class text_style { }; /** Creates a text style from the foreground (text) color. */ -FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept { +FMT_CONSTEXPR inline auto fg(detail::color_type foreground) noexcept + -> text_style { return text_style(true, foreground); } /** Creates a text style from the background color. */ -FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept { +FMT_CONSTEXPR inline auto bg(detail::color_type background) noexcept + -> text_style { return text_style(false, background); } -FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept { +FMT_CONSTEXPR inline auto operator|(emphasis lhs, emphasis rhs) noexcept + -> text_style { return text_style(lhs) | rhs; } @@ -384,8 +389,8 @@ template struct ansi_color_escape { } FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; } - FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; } - FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept { + FMT_CONSTEXPR auto begin() const noexcept -> const Char* { return buffer; } + FMT_CONSTEXPR20 auto end() const noexcept -> const Char* { return buffer + std::char_traits::length(buffer); } @@ -400,25 +405,27 @@ template struct ansi_color_escape { out[2] = static_cast('0' + c % 10); out[3] = static_cast(delimiter); } - static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept { + static FMT_CONSTEXPR auto has_emphasis(emphasis em, emphasis mask) noexcept + -> bool { return static_cast(em) & static_cast(mask); } }; template -FMT_CONSTEXPR ansi_color_escape make_foreground_color( - detail::color_type foreground) noexcept { +FMT_CONSTEXPR auto make_foreground_color(detail::color_type foreground) noexcept + -> ansi_color_escape { return ansi_color_escape(foreground, "\x1b[38;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_background_color( - detail::color_type background) noexcept { +FMT_CONSTEXPR auto make_background_color(detail::color_type background) noexcept + -> ansi_color_escape { return ansi_color_escape(background, "\x1b[48;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_emphasis(emphasis em) noexcept { +FMT_CONSTEXPR auto make_emphasis(emphasis em) noexcept + -> ansi_color_escape { return ansi_color_escape(em); } @@ -427,9 +434,10 @@ template inline void reset_color(buffer& buffer) { buffer.append(reset_color.begin(), reset_color.end()); } -template struct styled_arg { +template struct styled_arg : detail::view { const T& value; text_style style; + styled_arg(const T& v, text_style s) : value(v), style(s) {} }; template @@ -510,9 +518,10 @@ void print(const text_style& ts, const S& format_str, const Args&... args) { } template > -inline std::basic_string vformat( +inline auto vformat( const text_style& ts, const S& format_str, - basic_format_args>> args) { + basic_format_args>> args) + -> std::basic_string { basic_memory_buffer buf; detail::vformat_to(buf, ts, detail::to_string_view(format_str), args); return fmt::to_string(buf); @@ -531,8 +540,8 @@ inline std::basic_string vformat( \endrst */ template > -inline std::basic_string format(const text_style& ts, const S& format_str, - const Args&... args) { +inline auto format(const text_style& ts, const S& format_str, + const Args&... args) -> std::basic_string { return fmt::vformat(ts, detail::to_string_view(format_str), fmt::make_format_args>(args...)); } @@ -542,9 +551,10 @@ inline std::basic_string format(const text_style& ts, const S& format_str, */ template ::value)> -OutputIt vformat_to( - OutputIt out, const text_style& ts, basic_string_view format_str, - basic_format_args>> args) { +auto vformat_to(OutputIt out, const text_style& ts, + basic_string_view format_str, + basic_format_args>> args) + -> OutputIt { auto&& buf = detail::get_buffer(out); detail::vformat_to(buf, ts, format_str, args); return detail::get_iterator(buf, out); @@ -562,9 +572,10 @@ OutputIt vformat_to( fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); \endrst */ -template >::value&& - detail::is_string::value> +template < + typename OutputIt, typename S, typename... Args, + bool enable = detail::is_output_iterator>::value && + detail::is_string::value> inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, Args&&... args) -> typename std::enable_if::type { diff --git a/src/fmt/compile.h b/src/fmt/compile.h index af76507f07..71fa69c67e 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -14,8 +14,8 @@ FMT_BEGIN_NAMESPACE namespace detail { template -FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end, - counting_iterator it) { +FMT_CONSTEXPR inline auto copy_str(InputIt begin, InputIt end, + counting_iterator it) -> counting_iterator { return it + (end - begin); } @@ -57,7 +57,7 @@ struct udl_compiled_string : compiled_string { #endif template -const T& first(const T& value, const Tail&...) { +auto first(const T& value, const Tail&...) -> const T& { return value; } @@ -489,18 +489,19 @@ FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) { template ::value)> -format_to_n_result format_to_n(OutputIt out, size_t n, - const S& format_str, Args&&... args) { +auto format_to_n(OutputIt out, size_t n, const S& format_str, Args&&... args) + -> format_to_n_result { using traits = detail::fixed_buffer_traits; auto buf = detail::iterator_buffer(out, n); - format_to(std::back_inserter(buf), format_str, std::forward(args)...); + fmt::format_to(std::back_inserter(buf), format_str, + std::forward(args)...); return {buf.out(), buf.count()}; } template ::value)> -FMT_CONSTEXPR20 size_t formatted_size(const S& format_str, - const Args&... args) { +FMT_CONSTEXPR20 auto formatted_size(const S& format_str, const Args&... args) + -> size_t { return fmt::format_to(detail::counting_iterator(), format_str, args...) .count(); } diff --git a/src/fmt/core.h b/src/fmt/core.h index 9f7de781bb..6a53b8c52c 100644 --- a/src/fmt/core.h +++ b/src/fmt/core.h @@ -8,17 +8,15 @@ #ifndef FMT_CORE_H_ #define FMT_CORE_H_ -#include // std::byte -#include // std::FILE -#include // std::strlen -#include -#include -#include // std::addressof -#include -#include +#include // std::byte +#include // std::FILE +#include // std::strlen +#include // CHAR_BIT +#include // std::string +#include // std::enable_if // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 100100 +#define FMT_VERSION 100200 #if defined(__clang__) && !defined(__ibmxl__) # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) @@ -58,6 +56,12 @@ # define FMT_MSC_WARNING(...) #endif +#ifdef _GLIBCXX_RELEASE +# define FMT_GLIBCXX_RELEASE _GLIBCXX_RELEASE +#else +# define FMT_GLIBCXX_RELEASE 0 +#endif + #ifdef _MSVC_LANG # define FMT_CPLUSPLUS _MSVC_LANG #else @@ -88,6 +92,20 @@ #define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) +#ifndef FMT_DEPRECATED +# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 +# define FMT_DEPRECATED [[deprecated]] +# else +# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) +# define FMT_DEPRECATED __attribute__((deprecated)) +# elif FMT_MSC_VERSION +# define FMT_DEPRECATED __declspec(deprecated) +# else +# define FMT_DEPRECATED /* deprecated */ +# endif +# endif +#endif + // Check if relaxed C++14 constexpr is supported. // GCC doesn't allow throw in constexpr until version 6 (bug 67371). #ifndef FMT_USE_CONSTEXPR @@ -105,30 +123,17 @@ # define FMT_CONSTEXPR #endif -#if ((FMT_CPLUSPLUS >= 202002L) && \ - (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \ - (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002) +#if (FMT_CPLUSPLUS >= 202002L || \ + (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002)) && \ + ((!FMT_GLIBCXX_RELEASE || FMT_GLIBCXX_RELEASE >= 10) && \ + (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION >= 10000) && \ + (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1928)) && \ + defined(__cpp_lib_is_constant_evaluated) # define FMT_CONSTEXPR20 constexpr #else # define FMT_CONSTEXPR20 #endif -// Check if constexpr std::char_traits<>::{compare,length} are supported. -#if defined(__GLIBCXX__) -# if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 7 // GCC 7+ libstdc++ has _GLIBCXX_RELEASE. -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -# endif -#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \ - _LIBCPP_VERSION >= 4000 -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#endif -#ifndef FMT_CONSTEXPR_CHAR_TRAITS -# define FMT_CONSTEXPR_CHAR_TRAITS -#endif - // Check if exceptions are disabled. #ifndef FMT_EXCEPTIONS # if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ @@ -191,33 +196,25 @@ # define FMT_END_EXPORT #endif +#if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +#else +# define FMT_VISIBILITY(value) +#endif + #if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# ifdef FMT_LIB_EXPORT +# if defined(FMT_LIB_EXPORT) # define FMT_API __declspec(dllexport) # elif defined(FMT_SHARED) # define FMT_API __declspec(dllimport) # endif -#else -# if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) -# if defined(__GNUC__) || defined(__clang__) -# define FMT_API __attribute__((visibility("default"))) -# endif -# endif +#elif defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_API FMT_VISIBILITY("default") #endif #ifndef FMT_API # define FMT_API #endif -// libc++ supports string_view in pre-c++17. -#if FMT_HAS_INCLUDE() && \ - (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) -# include -# define FMT_USE_STRING_VIEW -#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L -# include -# define FMT_USE_EXPERIMENTAL_STRING_VIEW -#endif - #ifndef FMT_UNICODE # define FMT_UNICODE !FMT_MSC_VERSION #endif @@ -228,8 +225,9 @@ __apple_build_version__ >= 14000029L) && \ FMT_CPLUSPLUS >= 202002L) || \ (defined(__cpp_consteval) && \ - (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704)) -// consteval is broken in MSVC before VS2022 and Apple clang before 14. + (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1929)) +// consteval is broken in MSVC before VS2019 version 16.10 and Apple clang +// before 14. # define FMT_CONSTEVAL consteval # define FMT_HAS_CONSTEVAL # else @@ -248,6 +246,15 @@ # endif #endif +// GCC < 5 requires this-> in decltype. +#ifndef FMT_DECLTYPE_THIS +# if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +# define FMT_DECLTYPE_THIS this-> +# else +# define FMT_DECLTYPE_THIS +# endif +#endif + // Enable minimal optimizations for more compact code in debug mode. FMT_GCC_PRAGMA("GCC push_options") #if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) && \ @@ -269,20 +276,57 @@ template using remove_const_t = typename std::remove_const::type; template using remove_cvref_t = typename std::remove_cv>::type; -template struct type_identity { using type = T; }; +template struct type_identity { + using type = T; +}; template using type_identity_t = typename type_identity::type; template using underlying_t = typename std::underlying_type::type; -// Checks whether T is a container with contiguous storage. -template struct is_contiguous : std::false_type {}; -template -struct is_contiguous> : std::true_type {}; +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { + using type = void; +}; +template using void_t = typename void_t_impl::type; +#else +template using void_t = void; +#endif struct monostate { constexpr monostate() {} }; +// An implementation of back_insert_iterator to avoid dependency on . +template class back_insert_iterator { + private: + Container* container_; + + friend auto get_container(back_insert_iterator it) -> Container& { + return *it.container_; + } + + public: + using difference_type = ptrdiff_t; + FMT_UNCHECKED_ITERATOR(back_insert_iterator); + + explicit back_insert_iterator(Container& c) : container_(&c) {} + + auto operator=(const typename Container::value_type& value) + -> back_insert_iterator& { + container_->push_back(value); + return *this; + } + auto operator*() -> back_insert_iterator& { return *this; } + auto operator++() -> back_insert_iterator& { return *this; } + auto operator++(int) -> back_insert_iterator { return *this; } +}; + +template +auto back_inserter(Container& c) -> back_insert_iterator { + return {c}; +} + // An enable_if helper to be used in template parameters which results in much // shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed // to workaround a bug in MSVC 2019 (see #1140 and #1186). @@ -310,10 +354,9 @@ template FMT_CONSTEXPR void ignore_unused(const T&...) {} constexpr FMT_INLINE auto is_constant_evaluated( bool default_value = false) noexcept -> bool { // Workaround for incompatibility between libstdc++ consteval-based -// std::is_constant_evaluated() implementation and clang-14. -// https://github.com/fmtlib/fmt/issues/3247 -#if FMT_CPLUSPLUS >= 202002L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 12 && \ +// std::is_constant_evaluated() implementation and clang-14: +// https://github.com/fmtlib/fmt/issues/3247. +#if FMT_CPLUSPLUS >= 202002L && FMT_GLIBCXX_RELEASE >= 12 && \ (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500) ignore_unused(default_value); return __builtin_is_constant_evaluated(); @@ -346,15 +389,6 @@ FMT_NORETURN FMT_API void assert_fail(const char* file, int line, # endif #endif -#if defined(FMT_USE_STRING_VIEW) -template using std_string_view = std::basic_string_view; -#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) -template -using std_string_view = std::experimental::basic_string_view; -#else -template struct std_string_view {}; -#endif - #ifdef FMT_USE_INT128 // Do nothing. #elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ @@ -386,6 +420,15 @@ FMT_CONSTEXPR auto to_unsigned(Int value) -> return static_cast::type>(value); } +template +struct is_string_like : std::false_type {}; + +// A heuristic to detect std::string and std::string_view. +template +struct is_string_like().find_first_of( + typename T::value_type(), 0))>> : std::true_type { +}; + FMT_CONSTEXPR inline auto is_utf8() -> bool { FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char section[] = "\u00A7"; @@ -394,8 +437,33 @@ FMT_CONSTEXPR inline auto is_utf8() -> bool { return FMT_UNICODE || (sizeof(section) == 3 && uchar(section[0]) == 0xC2 && uchar(section[1]) == 0xA7); } + +template FMT_CONSTEXPR auto length(const Char* s) -> size_t { + size_t len = 0; + while (*s++) ++len; + return len; +} + +template +FMT_CONSTEXPR auto compare(const Char* s1, const Char* s2, std::size_t n) + -> int { + for (; n != 0; ++s1, ++s2, --n) { + if (*s1 < *s2) return -1; + if (*s1 > *s2) return 1; + } + return 0; +} } // namespace detail +template +using basic_string = + std::basic_string, std::allocator>; + +// Checks whether T is a container with contiguous storage. +template struct is_contiguous : std::false_type {}; +template +struct is_contiguous> : std::true_type {}; + /** An implementation of ``std::basic_string_view`` for pre-C++17. It provides a subset of the API. ``fmt::basic_string_view`` is used for format strings even @@ -420,29 +488,25 @@ template class basic_string_view { : data_(s), size_(count) {} /** - \rst - Constructs a string reference object from a C string computing - the size with ``std::char_traits::length``. - \endrst + Constructs a string reference object from a C string. */ - FMT_CONSTEXPR_CHAR_TRAITS + FMT_CONSTEXPR20 FMT_INLINE basic_string_view(const Char* s) : data_(s), size_(detail::const_check(std::is_same::value && - !detail::is_constant_evaluated(true)) + !detail::is_constant_evaluated(false)) ? std::strlen(reinterpret_cast(s)) - : std::char_traits::length(s)) {} + : detail::length(s)) {} - /** Constructs a string reference from a ``std::basic_string`` object. */ - template - FMT_CONSTEXPR basic_string_view( - const std::basic_string& s) noexcept - : data_(s.data()), size_(s.size()) {} - - template >::value)> - FMT_CONSTEXPR basic_string_view(S s) noexcept + /** + Constructs a string reference from a ``std::basic_string`` or a + ``std::basic_string_view`` object. + */ + template ::value&& std::is_same< + typename S::value_type, Char>::value)> + FMT_CONSTEXPR basic_string_view(const S& s) noexcept : data_(s.data()), size_(s.size()) {} /** Returns a pointer to the string data. */ @@ -463,30 +527,28 @@ template class basic_string_view { size_ -= n; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with( - basic_string_view sv) const noexcept { - return size_ >= sv.size_ && - std::char_traits::compare(data_, sv.data_, sv.size_) == 0; + FMT_CONSTEXPR auto starts_with(basic_string_view sv) const noexcept + -> bool { + return size_ >= sv.size_ && detail::compare(data_, sv.data_, sv.size_) == 0; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept { - return size_ >= 1 && std::char_traits::eq(*data_, c); + FMT_CONSTEXPR auto starts_with(Char c) const noexcept -> bool { + return size_ >= 1 && *data_ == c; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const { + FMT_CONSTEXPR auto starts_with(const Char* s) const -> bool { return starts_with(basic_string_view(s)); } // Lexicographically compare this string reference to other. - FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int { + FMT_CONSTEXPR auto compare(basic_string_view other) const -> int { size_t str_size = size_ < other.size_ ? size_ : other.size_; - int result = std::char_traits::compare(data_, other.data_, str_size); + int result = detail::compare(data_, other.data_, str_size); if (result == 0) result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); return result; } - FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs, - basic_string_view rhs) - -> bool { + FMT_CONSTEXPR friend auto operator==(basic_string_view lhs, + basic_string_view rhs) -> bool { return lhs.compare(rhs) == 0; } friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { @@ -526,21 +588,16 @@ template ::value)> FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view { return s; } -template -inline auto to_string_view(const std::basic_string& s) - -> basic_string_view { - return s; +template ::value)> +inline auto to_string_view(const S& s) + -> basic_string_view { + return s; // std::basic_string[_view] } template constexpr auto to_string_view(basic_string_view s) -> basic_string_view { return s; } -template >::value)> -inline auto to_string_view(std_string_view s) -> basic_string_view { - return s; -} template ::value)> constexpr auto to_string_view(const S& s) -> basic_string_view { @@ -609,10 +666,10 @@ FMT_TYPE_CONSTANT(const Char*, cstring_type); FMT_TYPE_CONSTANT(basic_string_view, string_type); FMT_TYPE_CONSTANT(const void*, pointer_type); -constexpr bool is_integral_type(type t) { +constexpr auto is_integral_type(type t) -> bool { return t > type::none_type && t <= type::last_integer_type; } -constexpr bool is_arithmetic_type(type t) { +constexpr auto is_arithmetic_type(type t) -> bool { return t > type::none_type && t <= type::last_numeric_type; } @@ -635,21 +692,10 @@ enum { cstring_set = set(type::cstring_type), pointer_set = set(type::pointer_type) }; - -FMT_NORETURN FMT_API void throw_format_error(const char* message); - -struct error_handler { - constexpr error_handler() = default; - - // This function is intentionally not constexpr to give a compile-time error. - FMT_NORETURN void on_error(const char* message) { - throw_format_error(message); - } -}; } // namespace detail /** Throws ``format_error`` with a given message. */ -using detail::throw_format_error; +FMT_NORETURN FMT_API void throw_format_error(const char* message); /** String's character type. */ template using char_t = typename detail::char_t_impl::type; @@ -701,7 +747,7 @@ template class basic_format_parse_context { */ FMT_CONSTEXPR auto next_arg_id() -> int { if (next_arg_id_ < 0) { - detail::throw_format_error( + throw_format_error( "cannot switch from manual to automatic argument indexing"); return 0; } @@ -716,7 +762,7 @@ template class basic_format_parse_context { */ FMT_CONSTEXPR void check_arg_id(int id) { if (next_arg_id_ > 0) { - detail::throw_format_error( + throw_format_error( "cannot switch from automatic to manual argument indexing"); return; } @@ -769,35 +815,6 @@ class compile_parse_context : public basic_format_parse_context { } }; -// Extracts a reference to the container from back_insert_iterator. -template -inline auto get_container(std::back_insert_iterator it) - -> Container& { - using base = std::back_insert_iterator; - struct accessor : base { - accessor(base b) : base(b) {} - using base::container; - }; - return *accessor(it).container; -} - -template -FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) - -> OutputIt { - while (begin != end) *out++ = static_cast(*begin++); - return out; -} - -template , U>::value&& is_char::value)> -FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { - if (is_constant_evaluated()) return copy_str(begin, end, out); - auto size = to_unsigned(end - begin); - if (size > 0) memcpy(out, begin, size * sizeof(U)); - return out + size; -} - /** \rst A contiguous memory buffer with an optional growing ability. It is an internal @@ -810,13 +827,18 @@ template class buffer { size_t size_; size_t capacity_; + using grow_fun = void (*)(buffer& buf, size_t capacity); + grow_fun grow_; + protected: // Don't initialize ptr_ since it is not accessed to save a few cycles. FMT_MSC_WARNING(suppress : 26495) - buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {} + FMT_CONSTEXPR buffer(grow_fun grow, size_t sz) noexcept + : size_(sz), capacity_(sz), grow_(grow) {} - FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept - : ptr_(p), size_(sz), capacity_(cap) {} + FMT_CONSTEXPR20 buffer(grow_fun grow, T* p = nullptr, size_t sz = 0, + size_t cap = 0) noexcept + : ptr_(p), size_(sz), capacity_(cap), grow_(grow) {} FMT_CONSTEXPR20 ~buffer() = default; buffer(buffer&&) = default; @@ -827,9 +849,6 @@ template class buffer { capacity_ = buf_capacity; } - /** Increases the buffer capacity to hold at least *capacity* elements. */ - virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0; - public: using value_type = T; using const_reference = const T&; @@ -868,7 +887,7 @@ template class buffer { // for at least one additional element either by increasing the capacity or by // flushing the buffer if it is full. FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) { - if (new_capacity > capacity_) grow(new_capacity); + if (new_capacity > capacity_) grow_(*this, new_capacity); } FMT_CONSTEXPR20 void push_back(const T& value) { @@ -917,22 +936,25 @@ class iterator_buffer final : public Traits, public buffer { enum { buffer_size = 256 }; T data_[buffer_size]; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == buffer_size) flush(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() == buffer_size) static_cast(buf).flush(); } void flush() { auto size = this->size(); this->clear(); - out_ = copy_str(data_, data_ + this->limit(size), out_); + const T* begin = data_; + const T* end = begin + this->limit(size); + while (begin != end) *out_++ = *begin++; } public: explicit iterator_buffer(OutputIt out, size_t n = buffer_size) - : Traits(n), buffer(data_, 0, buffer_size), out_(out) {} + : Traits(n), buffer(grow, data_, 0, buffer_size), out_(out) {} iterator_buffer(iterator_buffer&& other) - : Traits(other), buffer(data_, 0, buffer_size), out_(other.out_) {} + : Traits(other), + buffer(grow, data_, 0, buffer_size), + out_(other.out_) {} ~iterator_buffer() { flush(); } auto out() -> OutputIt { @@ -951,9 +973,9 @@ class iterator_buffer final enum { buffer_size = 256 }; T data_[buffer_size]; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == this->capacity()) flush(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() == buf.capacity()) + static_cast(buf).flush(); } void flush() { @@ -967,10 +989,10 @@ class iterator_buffer final public: explicit iterator_buffer(T* out, size_t n = buffer_size) - : fixed_buffer_traits(n), buffer(out, 0, n), out_(out) {} + : fixed_buffer_traits(n), buffer(grow, out, 0, n), out_(out) {} iterator_buffer(iterator_buffer&& other) : fixed_buffer_traits(other), - buffer(std::move(other)), + buffer(static_cast(other)), out_(other.out_) { if (this->data() != out_) { this->set(data_, buffer_size); @@ -989,38 +1011,37 @@ class iterator_buffer final }; template class iterator_buffer final : public buffer { - protected: - FMT_CONSTEXPR20 void grow(size_t) override {} - public: - explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} + explicit iterator_buffer(T* out, size_t = 0) + : buffer([](buffer&, size_t) {}, out, 0, ~size_t()) {} auto out() -> T* { return &*this->end(); } }; // A buffer that writes to a container with the contiguous storage. template -class iterator_buffer, +class iterator_buffer, enable_if_t::value, typename Container::value_type>> final : public buffer { private: + using value_type = typename Container::value_type; Container& container_; - protected: - FMT_CONSTEXPR20 void grow(size_t capacity) override { - container_.resize(capacity); - this->set(&container_[0], capacity); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t capacity) { + auto& self = static_cast(buf); + self.container_.resize(capacity); + self.set(&self.container_[0], capacity); } public: explicit iterator_buffer(Container& c) - : buffer(c.size()), container_(c) {} - explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) + : buffer(grow, c.size()), container_(c) {} + explicit iterator_buffer(back_insert_iterator out, size_t = 0) : iterator_buffer(get_container(out)) {} - auto out() -> std::back_insert_iterator { - return std::back_inserter(container_); + auto out() -> back_insert_iterator { + return fmt::back_inserter(container_); } }; @@ -1031,15 +1052,14 @@ template class counting_buffer final : public buffer { T data_[buffer_size]; size_t count_ = 0; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() != buffer_size) return; - count_ += this->size(); - this->clear(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() != buffer_size) return; + static_cast(buf).count_ += buf.size(); + buf.clear(); } public: - counting_buffer() : buffer(data_, 0, buffer_size) {} + counting_buffer() : buffer(grow, data_, 0, buffer_size) {} auto count() -> size_t { return count_ + this->size(); } }; @@ -1053,7 +1073,7 @@ FMT_CONSTEXPR void basic_format_parse_context::do_check_arg_id(int id) { (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) { using context = detail::compile_parse_context; if (id >= static_cast(this)->num_args()) - detail::throw_format_error("argument not found"); + throw_format_error("argument not found"); } } @@ -1085,18 +1105,29 @@ template using has_formatter = std::is_constructible>; -// An output iterator that appends to a buffer. -// It is used to reduce symbol sizes for the common case. -class appender : public std::back_insert_iterator> { - using base = std::back_insert_iterator>; +// An output iterator that appends to a buffer. It is used instead of +// back_insert_iterator to reduce symbol sizes for the common case. +class appender { + private: + detail::buffer* buffer_; + + friend auto get_container(appender app) -> detail::buffer& { + return *app.buffer_; + } public: - using std::back_insert_iterator>::back_insert_iterator; - appender(base it) noexcept : base(it) {} + using difference_type = ptrdiff_t; FMT_UNCHECKED_ITERATOR(appender); - auto operator++() noexcept -> appender& { return *this; } - auto operator++(int) noexcept -> appender { return *this; } + appender(detail::buffer& buf) : buffer_(&buf) {} + + auto operator=(char c) -> appender& { + buffer_->push_back(c); + return *this; + } + auto operator*() -> appender& { return *this; } + auto operator++() -> appender& { return *this; } + auto operator++(int) -> appender { return *this; } }; namespace detail { @@ -1119,7 +1150,7 @@ constexpr auto has_const_formatter() -> bool { template using buffer_appender = conditional_t::value, appender, - std::back_insert_iterator>>; + back_insert_iterator>>; // Maps an output iterator to a buffer. template @@ -1128,7 +1159,7 @@ auto get_buffer(OutputIt out) -> iterator_buffer { } template , Buf>::value)> -auto get_buffer(std::back_insert_iterator out) -> buffer& { +auto get_buffer(back_insert_iterator out) -> buffer& { return get_container(out); } @@ -1293,7 +1324,13 @@ template class value { template FMT_CONSTEXPR20 FMT_INLINE value(T& val) { using value_type = remove_const_t; - custom.value = const_cast(std::addressof(val)); + // T may overload operator& e.g. std::vector::reference in libc++. +#ifdef __cpp_if_constexpr + if constexpr (std::is_same::value) + custom.value = const_cast(&val); +#endif + if (!is_constant_evaluated()) + custom.value = const_cast(&reinterpret_cast(val)); // Get the formatter type through the context to allow different contexts // have different extension points, e.g. `formatter` for `format` and // `printf_formatter` for `printf`. @@ -1314,6 +1351,7 @@ template class value { parse_ctx.advance_to(f.parse(parse_ctx)); using qualified_type = conditional_t(), const T, T>; + // Calling format through a mutable reference is deprecated. ctx.advance_to(f.format(*static_cast(arg), ctx)); } }; @@ -1327,7 +1365,7 @@ using ulong_type = conditional_t; template struct format_as_result { template ::value || std::is_class::value)> - static auto map(U*) -> decltype(format_as(std::declval())); + static auto map(U*) -> remove_cvref_t()))>; static auto map(...) -> void; using type = decltype(map(static_cast(nullptr))); @@ -1444,7 +1482,8 @@ template struct arg_mapper { // Only map owning types because mapping views can be unsafe. template , FMT_ENABLE_IF(std::is_arithmetic::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) -> decltype(this->map(U())) { + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> decltype(FMT_DECLTYPE_THIS map(U())) { return map(format_as(val)); } @@ -1468,13 +1507,14 @@ template struct arg_mapper { !is_string::value && !is_char::value && !is_named_arg::value && !std::is_arithmetic>::value)> - FMT_CONSTEXPR FMT_INLINE auto map(T& val) -> decltype(this->do_map(val)) { + FMT_CONSTEXPR FMT_INLINE auto map(T& val) + -> decltype(FMT_DECLTYPE_THIS do_map(val)) { return do_map(val); } template ::value)> FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg) - -> decltype(this->map(named_arg.value)) { + -> decltype(FMT_DECLTYPE_THIS map(named_arg.value)) { return map(named_arg.value); } @@ -1493,45 +1533,19 @@ enum { max_packed_args = 62 / packed_arg_bits }; enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; -template -auto copy_str(InputIt begin, InputIt end, appender out) -> appender { - get_container(out).append(begin, end); - return out; -} -template -auto copy_str(InputIt begin, InputIt end, - std::back_insert_iterator out) - -> std::back_insert_iterator { - get_container(out).append(begin, end); - return out; -} - -template -FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { - return detail::copy_str(rng.begin(), rng.end(), out); -} - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; -template using void_t = typename void_t_impl::type; -#else -template using void_t = void; -#endif - template struct is_output_iterator : std::false_type {}; +template <> struct is_output_iterator : std::true_type {}; + template struct is_output_iterator< - It, T, - void_t::iterator_category, - decltype(*std::declval() = std::declval())>> + It, T, void_t()++ = std::declval())>> : std::true_type {}; template struct is_back_insert_iterator : std::false_type {}; template -struct is_back_insert_iterator> +struct is_back_insert_iterator> : std::true_type {}; // A type-erased reference to an std::locale to avoid a heavy include. @@ -1607,8 +1621,8 @@ FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg { } // namespace detail FMT_BEGIN_EXPORT -// A formatting argument. It is a trivially copyable/constructible type to -// allow storage in basic_memory_buffer. +// A formatting argument. Context is a template parameter for the compiled API +// where output can be unbuffered. template class basic_format_arg { private: detail::value value_; @@ -1618,11 +1632,6 @@ template class basic_format_arg { friend FMT_CONSTEXPR auto detail::make_arg(T& value) -> basic_format_arg; - template - friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)); - friend class basic_format_args; friend class dynamic_format_arg_store; @@ -1660,55 +1669,68 @@ template class basic_format_arg { auto is_arithmetic() const -> bool { return detail::is_arithmetic_type(type_); } + + /** + \rst + Visits an argument dispatching to the appropriate visit method based on + the argument type. For example, if the argument type is ``double`` then + ``vis(value)`` will be called with the value of type ``double``. + \endrst + */ + template + FMT_CONSTEXPR auto visit(Visitor&& vis) -> decltype(vis(0)) { + switch (type_) { + case detail::type::none_type: + break; + case detail::type::int_type: + return vis(value_.int_value); + case detail::type::uint_type: + return vis(value_.uint_value); + case detail::type::long_long_type: + return vis(value_.long_long_value); + case detail::type::ulong_long_type: + return vis(value_.ulong_long_value); + case detail::type::int128_type: + return vis(detail::convert_for_visit(value_.int128_value)); + case detail::type::uint128_type: + return vis(detail::convert_for_visit(value_.uint128_value)); + case detail::type::bool_type: + return vis(value_.bool_value); + case detail::type::char_type: + return vis(value_.char_value); + case detail::type::float_type: + return vis(value_.float_value); + case detail::type::double_type: + return vis(value_.double_value); + case detail::type::long_double_type: + return vis(value_.long_double_value); + case detail::type::cstring_type: + return vis(value_.string.data); + case detail::type::string_type: + using sv = basic_string_view; + return vis(sv(value_.string.data, value_.string.size)); + case detail::type::pointer_type: + return vis(value_.pointer); + case detail::type::custom_type: + return vis(typename basic_format_arg::handle(value_.custom)); + } + return vis(monostate()); + } + + FMT_INLINE auto format_custom(const char_type* parse_begin, + typename Context::parse_context_type& parse_ctx, + Context& ctx) -> bool { + if (type_ != detail::type::custom_type) return false; + parse_ctx.advance_to(parse_begin); + value_.custom.format(value_.custom.value, parse_ctx, ctx); + return true; + } }; -/** - \rst - Visits an argument dispatching to the appropriate visit method based on - the argument type. For example, if the argument type is ``double`` then - ``vis(value)`` will be called with the value of type ``double``. - \endrst - */ -// DEPRECATED! template -FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( +FMT_DEPRECATED FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { - switch (arg.type_) { - case detail::type::none_type: - break; - case detail::type::int_type: - return vis(arg.value_.int_value); - case detail::type::uint_type: - return vis(arg.value_.uint_value); - case detail::type::long_long_type: - return vis(arg.value_.long_long_value); - case detail::type::ulong_long_type: - return vis(arg.value_.ulong_long_value); - case detail::type::int128_type: - return vis(detail::convert_for_visit(arg.value_.int128_value)); - case detail::type::uint128_type: - return vis(detail::convert_for_visit(arg.value_.uint128_value)); - case detail::type::bool_type: - return vis(arg.value_.bool_value); - case detail::type::char_type: - return vis(arg.value_.char_value); - case detail::type::float_type: - return vis(arg.value_.float_value); - case detail::type::double_type: - return vis(arg.value_.double_value); - case detail::type::long_double_type: - return vis(arg.value_.long_double_value); - case detail::type::cstring_type: - return vis(arg.value_.string.data); - case detail::type::string_type: - using sv = basic_string_view; - return vis(sv(arg.value_.string.data, arg.value_.string.size)); - case detail::type::pointer_type: - return vis(arg.value_.pointer); - case detail::type::custom_type: - return vis(typename basic_format_arg::handle(arg.value_.custom)); - } - return vis(monostate()); + return arg.visit(std::forward(vis)); } // Formatting context. @@ -1748,8 +1770,8 @@ template class basic_format_context { } auto args() const -> const format_args& { return args_; } - FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; } - void on_error(const char* message) { error_handler().on_error(message); } + // This function is intentionally not constexpr to give a compile-time error. + void on_error(const char* message) { throw_format_error(message); } // Returns an iterator to the beginning of the output range. FMT_CONSTEXPR auto out() -> iterator { return out_; } @@ -1831,7 +1853,7 @@ class format_arg_store // Arguments are taken by lvalue references to avoid some lifetime issues. template constexpr auto make_format_args(T&... args) - -> format_arg_store...> { + -> format_arg_store...> { return {args...}; } @@ -2107,11 +2129,8 @@ struct dynamic_format_specs : format_specs { }; // Converts a character to ASCII. Returns '\0' on conversion failure. -template ::value)> -constexpr auto to_ascii(Char c) -> char { - return c <= 0xff ? static_cast(c) : '\0'; -} -template ::value)> +template ::value || + std::is_enum::value)> constexpr auto to_ascii(Char c) -> char { return c <= 0xff ? static_cast(c) : '\0'; } @@ -2156,11 +2175,11 @@ FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, } while (p != end && '0' <= *p && *p <= '9'); auto num_digits = p - begin; begin = p; - if (num_digits <= std::numeric_limits::digits10) - return static_cast(value); + int digits10 = static_cast(sizeof(int) * CHAR_BIT * 3 / 10); + if (num_digits <= digits10) return static_cast(value); // Check for overflow. - const unsigned max = to_unsigned((std::numeric_limits::max)()); - return num_digits == std::numeric_limits::digits10 + 1 && + unsigned max = INT_MAX; + return num_digits == digits10 + 1 && prev * 10ull + unsigned(p[-1] - '0') <= max ? static_cast(value) : error_value; @@ -2188,9 +2207,8 @@ FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, Char c = *begin; if (c >= '0' && c <= '9') { int index = 0; - constexpr int max = (std::numeric_limits::max)(); if (c != '0') - index = parse_nonnegative_int(begin, end, max); + index = parse_nonnegative_int(begin, end, INT_MAX); else ++begin; if (begin == end || (*begin != '}' && *begin != ':')) @@ -2309,9 +2327,12 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( dynamic_format_specs& specs; type arg_type; - FMT_CONSTEXPR auto operator()(pres type, int set) -> const Char* { - if (!in(arg_type, set)) throw_format_error("invalid format specifier"); - specs.type = type; + FMT_CONSTEXPR auto operator()(pres pres_type, int set) -> const Char* { + if (!in(arg_type, set)) { + if (arg_type == type::none_type) return begin; + throw_format_error("invalid format specifier"); + } + specs.type = pres_type; return begin + 1; } } parse_presentation_type{begin, specs, arg_type}; @@ -2328,6 +2349,7 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( case '+': case '-': case ' ': + if (arg_type == type::none_type) return begin; enter_state(state::sign, in(arg_type, sint_set | float_set)); switch (c) { case '+': @@ -2343,14 +2365,17 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( ++begin; break; case '#': + if (arg_type == type::none_type) return begin; enter_state(state::hash, is_arithmetic_type(arg_type)); specs.alt = true; ++begin; break; case '0': enter_state(state::zero); - if (!is_arithmetic_type(arg_type)) + if (!is_arithmetic_type(arg_type)) { + if (arg_type == type::none_type) return begin; throw_format_error("format specifier requires numeric argument"); + } if (specs.align == align::none) { // Ignore 0 if align is specified for compatibility with std::format. specs.align = align::numeric; @@ -2372,12 +2397,14 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( begin = parse_dynamic_spec(begin, end, specs.width, specs.width_ref, ctx); break; case '.': + if (arg_type == type::none_type) return begin; enter_state(state::precision, in(arg_type, float_set | string_set | cstring_set)); begin = parse_precision(begin, end, specs.precision, specs.precision_ref, ctx); break; case 'L': + if (arg_type == type::none_type) return begin; enter_state(state::locale, is_arithmetic_type(arg_type)); specs.localized = true; ++begin; @@ -2411,6 +2438,8 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( case 'G': return parse_presentation_type(pres::general_upper, float_set); case 'c': + if (arg_type == type::bool_type) + throw_format_error("invalid format specifier"); return parse_presentation_type(pres::chr, integral_set); case 's': return parse_presentation_type(pres::string, @@ -2550,9 +2579,9 @@ FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) decltype(arg_mapper().map(std::declval())), typename strip_named_arg::type>; // LAMMPS customization. Fails to compile with (some) Intel compilers -#if defined(__cpp_if_constexpr) && 0 - if constexpr (std::is_default_constructible_v< - formatter>) { +#if defined(__cpp_if_constexpr) && 1 + if constexpr (std::is_default_constructible< + formatter>::value) { return formatter().parse(ctx); } else { type_is_unformattable_for _; @@ -2675,9 +2704,11 @@ void check_format_string(S format_str) { template struct vformat_args { using type = basic_format_args< - basic_format_context>, Char>>; + basic_format_context>, Char>>; +}; +template <> struct vformat_args { + using type = format_args; }; -template <> struct vformat_args { using type = format_args; }; // Use vformat_args and avoid type_identity to keep symbols short. template @@ -2779,7 +2810,7 @@ using format_string = basic_format_string...>; inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; } #endif -FMT_API auto vformat(string_view fmt, format_args args) -> std::string; +FMT_API auto vformat(string_view fmt, format_args args) -> basic_string; /** \rst @@ -2794,7 +2825,7 @@ FMT_API auto vformat(string_view fmt, format_args args) -> std::string; */ template FMT_NODISCARD FMT_INLINE auto format(format_string fmt, T&&... args) - -> std::string { + -> basic_string { return vformat(fmt, fmt::make_format_args(args...)); } @@ -2816,7 +2847,7 @@ auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt { **Example**:: auto out = std::vector(); - fmt::format_to(std::back_inserter(out), "{}", 42); + fmt::format_to(fmt::back_inserter(out), "{}", 42); \endrst */ template #endif -#ifdef _WIN32 +#if defined(_WIN32) && !defined(FMT_WINDOWS_NO_WCHAR) # include // _isatty #endif @@ -36,10 +36,6 @@ FMT_FUNC void assert_fail(const char* file, int line, const char* message) { std::terminate(); } -FMT_FUNC void throw_format_error(const char* message) { - FMT_THROW(format_error(message)); -} - FMT_FUNC void format_error_code(detail::buffer& out, int error_code, string_view message) noexcept { // Report error code making sure that the output fits into @@ -58,8 +54,8 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); auto it = buffer_appender(out); if (message.size() <= inline_buffer_size - error_code_size) - format_to(it, FMT_STRING("{}{}"), message, SEP); - format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); + fmt::format_to(it, FMT_STRING("{}{}"), message, SEP); + fmt::format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); FMT_ASSERT(out.size() <= inline_buffer_size, ""); } @@ -73,9 +69,8 @@ FMT_FUNC void report_error(format_func func, int error_code, } // A wrapper around fwrite that throws on error. -inline void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { - size_t written = std::fwrite(ptr, size, count, stream); +inline void fwrite_fully(const void* ptr, size_t count, FILE* stream) { + size_t written = std::fwrite(ptr, 1, count, stream); if (written < count) FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); } @@ -86,7 +81,7 @@ locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { static_assert(std::is_same::value, ""); } -template Locale locale_ref::get() const { +template auto locale_ref::get() const -> Locale { static_assert(std::is_same::value, ""); return locale_ ? *static_cast(locale_) : std::locale(); } @@ -98,7 +93,8 @@ FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); return {std::move(grouping), thousands_sep}; } -template FMT_FUNC Char decimal_point_impl(locale_ref loc) { +template +FMT_FUNC auto decimal_point_impl(locale_ref loc) -> Char { return std::use_facet>(loc.get()) .decimal_point(); } @@ -127,6 +123,10 @@ FMT_FUNC auto write_loc(appender out, loc_value value, } } // namespace detail +FMT_FUNC void throw_format_error(const char* message) { + FMT_THROW(format_error(message)); +} + template typename Locale::id format_facet::id; #ifndef FMT_STATIC_THOUSANDS_SEPARATOR @@ -144,24 +144,25 @@ FMT_API FMT_FUNC auto format_facet::do_put( } #endif -FMT_FUNC std::system_error vsystem_error(int error_code, string_view fmt, - format_args args) { +FMT_FUNC auto vsystem_error(int error_code, string_view fmt, format_args args) + -> std::system_error { auto ec = std::error_code(error_code, std::generic_category()); return std::system_error(ec, vformat(fmt, args)); } namespace detail { -template inline bool operator==(basic_fp x, basic_fp y) { +template +inline auto operator==(basic_fp x, basic_fp y) -> bool { return x.f == y.f && x.e == y.e; } // Compilers should be able to optimize this into the ror instruction. -FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint32_t n, uint32_t r) noexcept -> uint32_t { r &= 31; return (n >> r) | (n << (32 - r)); } -FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint64_t n, uint32_t r) noexcept -> uint64_t { r &= 63; return (n >> r) | (n << (64 - r)); } @@ -170,14 +171,14 @@ FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { namespace dragonbox { // Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_upper64(uint32_t x, uint64_t y) noexcept -> uint64_t { return umul128_upper64(static_cast(x) << 32, y); } // Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_lower128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_lower128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint64_t high = x * y.high(); uint128_fallback high_low = umul128(x, y.low()); return {high + high_low.high(), high_low.low()}; @@ -185,12 +186,12 @@ inline uint128_fallback umul192_lower128(uint64_t x, // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_lower64(uint32_t x, uint64_t y) noexcept -> uint64_t { return x * y; } // Various fast log computations. -inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { +inline auto floor_log10_pow2_minus_log10_4_over_3(int e) noexcept -> int { FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent"); return (e * 631305 - 261663) >> 21; } @@ -204,7 +205,7 @@ FMT_INLINE_VARIABLE constexpr struct { // divisible by pow(10, N). // Precondition: n <= pow(10, N + 1). template -bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { +auto check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept -> bool { // The numbers below are chosen such that: // 1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100, // 2. nm mod 2^k < m if and only if n is divisible by d, @@ -229,7 +230,7 @@ bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { // Computes floor(n / pow(10, N)) for small n and N. // Precondition: n <= pow(10, N + 1). -template uint32_t small_division_by_pow10(uint32_t n) noexcept { +template auto small_division_by_pow10(uint32_t n) noexcept -> uint32_t { constexpr auto info = div_small_pow10_infos[N - 1]; FMT_ASSERT(n <= info.divisor * 10, "n is too large"); constexpr uint32_t magic_number = @@ -238,12 +239,12 @@ template uint32_t small_division_by_pow10(uint32_t n) noexcept { } // Computes floor(n / 10^(kappa + 1)) (float) -inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint32_t n) noexcept -> uint32_t { // 1374389535 = ceil(2^37/100) return static_cast((static_cast(n) * 1374389535) >> 37); } // Computes floor(n / 10^(kappa + 1)) (double) -inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint64_t n) noexcept -> uint64_t { // 2361183241434822607 = ceil(2^(64+7)/1000) return umul128_upper64(n, 2361183241434822607ull) >> 7; } @@ -255,7 +256,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint64_t; - static uint64_t get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint64_t { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); static constexpr const uint64_t pow10_significands[] = { @@ -297,20 +298,23 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul96_upper64(u, cache); return {static_cast(r >> 32), static_cast(r) == 0}; } - static uint32_t compute_delta(const cache_entry_type& cache, - int beta) noexcept { + static auto compute_delta(const cache_entry_type& cache, int beta) noexcept + -> uint32_t { return static_cast(cache >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -319,22 +323,22 @@ template <> struct cache_accessor { static_cast(r >> (32 - beta)) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache - (cache >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache + (cache >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (static_cast( cache >> (64 - num_significand_bits() - 2 - beta)) + 1) / @@ -346,7 +350,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint128_fallback; - static uint128_fallback get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint128_fallback { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); @@ -985,8 +989,7 @@ template <> struct cache_accessor { {0xe0accfa875af45a7, 0x93eb1b80a33b8606}, {0x8c6c01c9498d8b88, 0xbc72f130660533c4}, {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5}, - { 0xdb68c2ca82ed2a05, - 0xa67398db9f6820e2 } + {0xdb68c2ca82ed2a05, 0xa67398db9f6820e2}, #else {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, @@ -1071,19 +1074,22 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul192_upper128(u, cache); return {r.high(), r.low() == 0}; } - static uint32_t compute_delta(cache_entry_type const& cache, - int beta) noexcept { + static auto compute_delta(cache_entry_type const& cache, int beta) noexcept + -> uint32_t { return static_cast(cache.high() >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -1092,35 +1098,35 @@ template <> struct cache_accessor { ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() - (cache.high() >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() + (cache.high() >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return ((cache.high() >> (64 - num_significand_bits() - 2 - beta)) + 1) / 2; } }; -FMT_FUNC uint128_fallback get_cached_power(int k) noexcept { +FMT_FUNC auto get_cached_power(int k) noexcept -> uint128_fallback { return cache_accessor::get_cached_power(k); } // Various integer checks template -bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { +auto is_left_endpoint_integer_shorter_interval(int exponent) noexcept -> bool { const int case_shorter_interval_left_endpoint_lower_threshold = 2; const int case_shorter_interval_left_endpoint_upper_threshold = 3; return exponent >= case_shorter_interval_left_endpoint_lower_threshold && @@ -1132,7 +1138,7 @@ FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept { FMT_ASSERT(n != 0, ""); // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1. constexpr uint32_t mod_inv_5 = 0xcccccccd; - constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 + constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 while (true) { auto q = rotr(n * mod_inv_25, 2); @@ -1168,7 +1174,7 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { // If n is not divisible by 10^8, work with n itself. constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd; - constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // = mod_inv_5 * mod_inv_5 + constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // mod_inv_5 * mod_inv_5 int s = 0; while (true) { @@ -1234,7 +1240,7 @@ FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { return ret_value; } -template decimal_fp to_decimal(T x) noexcept { +template auto to_decimal(T x) noexcept -> decimal_fp { // Step 1: integer promotion & Schubfach multiplier calculation. using carrier_uint = typename float_info::carrier_uint; @@ -1373,15 +1379,15 @@ template <> struct formatter { for (auto i = n.bigits_.size(); i > 0; --i) { auto value = n.bigits_[i - 1u]; if (first) { - out = format_to(out, FMT_STRING("{:x}"), value); + out = fmt::format_to(out, FMT_STRING("{:x}"), value); first = false; continue; } - out = format_to(out, FMT_STRING("{:08x}"), value); + out = fmt::format_to(out, FMT_STRING("{:08x}"), value); } if (n.exp_ > 0) - out = format_to(out, FMT_STRING("p{}"), - n.exp_ * detail::bigint::bigit_bits); + out = fmt::format_to(out, FMT_STRING("p{}"), + n.exp_ * detail::bigint::bigit_bits); return out; } }; @@ -1417,7 +1423,7 @@ FMT_FUNC void report_system_error(int error_code, report_error(format_system_error, error_code, message); } -FMT_FUNC std::string vformat(string_view fmt, format_args args) { +FMT_FUNC auto vformat(string_view fmt, format_args args) -> std::string { // Don't optimize the "{}" case to keep the binary size small and because it // can be better optimized in fmt::format anyway. auto buffer = memory_buffer(); @@ -1426,33 +1432,38 @@ FMT_FUNC std::string vformat(string_view fmt, format_args args) { } namespace detail { -#ifndef _WIN32 -FMT_FUNC bool write_console(std::FILE*, string_view) { return false; } +#if !defined(_WIN32) || defined(FMT_WINDOWS_NO_WCHAR) +FMT_FUNC auto write_console(int, string_view) -> bool { return false; } #else using dword = conditional_t; extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // void*, const void*, dword, dword*, void*); -FMT_FUNC bool write_console(std::FILE* f, string_view text) { - auto fd = _fileno(f); - if (!_isatty(fd)) return false; +FMT_FUNC bool write_console(int fd, string_view text) { auto u16 = utf8_to_utf16(text); - auto written = dword(); return WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), u16.c_str(), - static_cast(u16.size()), &written, nullptr) != 0; + static_cast(u16.size()), nullptr, nullptr) != 0; } +#endif +#ifdef _WIN32 // Print assuming legacy (non-Unicode) encoding. FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args) { auto buffer = memory_buffer(); - detail::vformat_to(buffer, fmt, - basic_format_args>(args)); - fwrite_fully(buffer.data(), 1, buffer.size(), f); + detail::vformat_to(buffer, fmt, args); + fwrite_fully(buffer.data(), buffer.size(), f); } #endif FMT_FUNC void print(std::FILE* f, string_view text) { - if (!write_console(f, text)) fwrite_fully(text.data(), 1, text.size(), f); +#ifdef _WIN32 + int fd = _fileno(f); + if (_isatty(fd)) { + std::fflush(f); + if (write_console(fd, text)) return; + } +#endif + fwrite_fully(text.data(), text.size(), f); } } // namespace detail diff --git a/src/fmt/format.h b/src/fmt/format.h index 87a34b972c..8cdf95b7bd 100644 --- a/src/fmt/format.h +++ b/src/fmt/format.h @@ -37,17 +37,28 @@ #include // uint32_t #include // std::memcpy #include // std::initializer_list -#include // std::numeric_limits -#include // std::uninitialized_copy -#include // std::runtime_error -#include // std::system_error +#include +#include // std::numeric_limits +#include // std::uninitialized_copy +#include // std::runtime_error +#include // std::system_error #ifdef __cpp_lib_bit_cast -# include // std::bitcast +# include // std::bit_cast #endif #include "core.h" +// libc++ supports string_view in pre-c++17. +#if FMT_HAS_INCLUDE() && \ + (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) +# include +# define FMT_USE_STRING_VIEW +#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L +# include +# define FMT_USE_EXPERIMENTAL_STRING_VIEW +#endif + #if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L # define FMT_INLINE_VARIABLE inline #else @@ -65,25 +76,11 @@ # define FMT_FALLTHROUGH #endif -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 -# define FMT_DEPRECATED [[deprecated]] -# else -# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VERSION -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif -# endif -#endif - #ifndef FMT_NO_UNIQUE_ADDRESS # if FMT_CPLUSPLUS >= 202002L # if FMT_HAS_CPP_ATTRIBUTE(no_unique_address) # define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]] -// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485) +// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485). # elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION # define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] # endif @@ -93,10 +90,11 @@ # define FMT_NO_UNIQUE_ADDRESS #endif -#if FMT_GCC_VERSION || defined(__clang__) -# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +// Visibility when compiled as a shared library/object. +#if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_SO_VISIBILITY(value) FMT_VISIBILITY(value) #else -# define FMT_VISIBILITY(value) +# define FMT_SO_VISIBILITY(value) #endif #ifdef __has_builtin @@ -152,7 +150,10 @@ FMT_END_NAMESPACE #ifndef FMT_USE_USER_DEFINED_LITERALS // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. -# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ +// +// GCC before 4.9 requires a space in `operator"" _a` which is invalid in later +// compiler versions. +# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 409 || \ FMT_MSC_VERSION >= 1900) && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480) # define FMT_USE_USER_DEFINED_LITERALS 1 @@ -272,20 +273,19 @@ inline auto ctzll(uint64_t x) -> int { FMT_END_NAMESPACE #endif +namespace std { +template <> struct iterator_traits { + using value_type = void; + using iterator_category = std::output_iterator_tag; +}; +template +struct iterator_traits> { + using value_type = void; + using iterator_category = std::output_iterator_tag; +}; +} // namespace std + FMT_BEGIN_NAMESPACE - -template struct disjunction : std::false_type {}; -template struct disjunction

: P {}; -template -struct disjunction - : conditional_t> {}; - -template struct conjunction : std::true_type {}; -template struct conjunction

: P {}; -template -struct conjunction - : conditional_t, P1> {}; - namespace detail { FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { @@ -295,6 +295,15 @@ FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { #endif } +#if defined(FMT_USE_STRING_VIEW) +template using std_string_view = std::basic_string_view; +#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) +template +using std_string_view = std::experimental::basic_string_view; +#else +template struct std_string_view {}; +#endif + template struct string_literal { static constexpr CharT value[sizeof...(C)] = {C...}; constexpr operator basic_string_view() const { @@ -307,37 +316,6 @@ template constexpr CharT string_literal::value[sizeof...(C)]; #endif -template class formatbuf : public Streambuf { - private: - using char_type = typename Streambuf::char_type; - using streamsize = decltype(std::declval().sputn(nullptr, 0)); - using int_type = typename Streambuf::int_type; - using traits_type = typename Streambuf::traits_type; - - buffer& buffer_; - - public: - explicit formatbuf(buffer& buf) : buffer_(buf) {} - - protected: - // The put area is always empty. This makes the implementation simpler and has - // the advantage that the streambuf and the buffer are always in sync and - // sputc never writes into uninitialized memory. A disadvantage is that each - // call to sputc always results in a (virtual) call to overflow. There is no - // disadvantage here for sputn since this always results in a call to xsputn. - - auto overflow(int_type ch) -> int_type override { - if (!traits_type::eq_int_type(ch, traits_type::eof())) - buffer_.push_back(static_cast(ch)); - return ch; - } - - auto xsputn(const char_type* s, streamsize count) -> streamsize override { - buffer_.append(s, s + count); - return count; - } -}; - // Implementation of std::bit_cast for pre-C++20. template FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To { @@ -373,8 +351,8 @@ class uint128_fallback { constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {} constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {} - constexpr uint64_t high() const noexcept { return hi_; } - constexpr uint64_t low() const noexcept { return lo_; } + constexpr auto high() const noexcept -> uint64_t { return hi_; } + constexpr auto low() const noexcept -> uint64_t { return lo_; } template ::value)> constexpr explicit operator T() const { @@ -450,7 +428,7 @@ class uint128_fallback { hi_ &= n.hi_; } - FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept { + FMT_CONSTEXPR20 auto operator+=(uint64_t n) noexcept -> uint128_fallback& { if (is_constant_evaluated()) { lo_ += n; hi_ += (lo_ < n ? 1 : 0); @@ -546,6 +524,52 @@ FMT_INLINE void assume(bool condition) { #endif } +// Extracts a reference to the container from back_insert_iterator. +template +inline auto get_container(std::back_insert_iterator it) + -> Container& { + using base = std::back_insert_iterator; + struct accessor : base { + accessor(base b) : base(b) {} + using base::container; + }; + return *accessor(it).container; +} + +template +FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) + -> OutputIt { + while (begin != end) *out++ = static_cast(*begin++); + return out; +} + +template , U>::value&& is_char::value)> +FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { + if (is_constant_evaluated()) return copy_str(begin, end, out); + auto size = to_unsigned(end - begin); + if (size > 0) memcpy(out, begin, size * sizeof(U)); + return out + size; +} + +template +auto copy_str(InputIt begin, InputIt end, appender out) -> appender { + get_container(out).append(begin, end); + return out; +} +template +auto copy_str(InputIt begin, InputIt end, back_insert_iterator out) + -> back_insert_iterator { + get_container(out).append(begin, end); + return out; +} + +template +FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { + return detail::copy_str(rng.begin(), rng.end(), out); +} + // An approximation of iterator_t for pre-C++20 systems. template using iterator_t = decltype(std::begin(std::declval())); @@ -740,7 +764,7 @@ inline auto compute_width(basic_string_view s) -> size_t { } // Computes approximate display width of a UTF-8 string. -FMT_CONSTEXPR inline size_t compute_width(string_view s) { +FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t { size_t num_code_points = 0; // It is not a lambda for compatibility with C++14. struct count_code_points { @@ -787,12 +811,17 @@ inline auto code_point_index(basic_string_view s, size_t n) -> size_t { // Calculates the index of the nth code point in a UTF-8 string. inline auto code_point_index(string_view s, size_t n) -> size_t { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; - } - return s.size(); + size_t result = s.size(); + const char* begin = s.begin(); + for_each_codepoint(s, [begin, &n, &result](uint32_t, string_view sv) { + if (n != 0) { + --n; + return true; + } + result = to_unsigned(sv.begin() - begin); + return false; + }); + return result; } inline auto code_point_index(basic_string_view s, size_t n) @@ -902,7 +931,7 @@ enum { inline_buffer_size = 500 }; **Example**:: auto out = fmt::memory_buffer(); - format_to(std::back_inserter(out), "The answer is {}.", 42); + fmt::format_to(std::back_inserter(out), "The answer is {}.", 42); This will append the following output to the ``out`` object: @@ -929,27 +958,29 @@ class basic_memory_buffer final : public detail::buffer { } protected: - FMT_CONSTEXPR20 void grow(size_t size) override { + static FMT_CONSTEXPR20 void grow(detail::buffer& buf, size_t size) { detail::abort_fuzzing_if(size > 5000); - const size_t max_size = std::allocator_traits::max_size(alloc_); - size_t old_capacity = this->capacity(); + auto& self = static_cast(buf); + const size_t max_size = + std::allocator_traits::max_size(self.alloc_); + size_t old_capacity = buf.capacity(); size_t new_capacity = old_capacity + old_capacity / 2; if (size > new_capacity) new_capacity = size; else if (new_capacity > max_size) new_capacity = size > max_size ? size : max_size; - T* old_data = this->data(); + T* old_data = buf.data(); T* new_data = - std::allocator_traits::allocate(alloc_, new_capacity); + std::allocator_traits::allocate(self.alloc_, new_capacity); // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481). - detail::assume(this->size() <= new_capacity); + detail::assume(buf.size() <= new_capacity); // The following code doesn't throw, so the raw pointer above doesn't leak. - std::uninitialized_copy_n(old_data, this->size(), new_data); - this->set(new_data, new_capacity); + std::uninitialized_copy_n(old_data, buf.size(), new_data); + self.set(new_data, new_capacity); // deallocate must not throw according to the standard, but even if it does, // the buffer already uses the new storage and will deallocate it in // destructor. - if (old_data != store_) alloc_.deallocate(old_data, old_capacity); + if (old_data != self.store_) self.alloc_.deallocate(old_data, old_capacity); } public: @@ -958,7 +989,7 @@ class basic_memory_buffer final : public detail::buffer { FMT_CONSTEXPR20 explicit basic_memory_buffer( const Allocator& alloc = Allocator()) - : alloc_(alloc) { + : detail::buffer(grow), alloc_(alloc) { this->set(store_, SIZE); if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T()); } @@ -990,7 +1021,8 @@ class basic_memory_buffer final : public detail::buffer { of the other object to it. \endrst */ - FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept { + FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept + : detail::buffer(grow) { move(other); } @@ -1018,7 +1050,6 @@ class basic_memory_buffer final : public detail::buffer { /** Increases the buffer capacity to *new_capacity*. */ void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } - // Directly append data into the buffer using detail::buffer::append; template void append(const ContiguousRange& range) { @@ -1034,7 +1065,7 @@ struct is_contiguous> : std::true_type { FMT_END_EXPORT namespace detail { -FMT_API bool write_console(std::FILE* f, string_view text); +FMT_API auto write_console(int fd, string_view text) -> bool; FMT_API void print(std::FILE*, string_view); } // namespace detail @@ -1046,7 +1077,7 @@ FMT_BEGIN_EXPORT #endif /** An error reported from a formatting function. */ -class FMT_VISIBILITY("default") format_error : public std::runtime_error { +class FMT_SO_VISIBILITY("default") format_error : public std::runtime_error { public: using std::runtime_error::runtime_error; }; @@ -1089,7 +1120,7 @@ class loc_value { loc_value(T) {} template auto visit(Visitor&& vis) -> decltype(vis(0)) { - return visit_format_arg(vis, value_); + return value_.visit(vis); } }; @@ -1153,13 +1184,13 @@ using uint32_or_64_or_128_t = template using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 +#define FMT_POWERS_OF_10(factor) \ + factor * 10, (factor) * 100, (factor) * 1000, (factor) * 10000, \ + (factor) * 100000, (factor) * 1000000, (factor) * 10000000, \ + (factor) * 100000000, (factor) * 1000000000 // Converts value in the range [0, 100) to a string. -constexpr const char* digits2(size_t value) { +constexpr auto digits2(size_t value) -> const char* { // GCC generates slightly better code when value is pointer-size. return &"0001020304050607080910111213141516171819" "2021222324252627282930313233343536373839" @@ -1169,7 +1200,7 @@ constexpr const char* digits2(size_t value) { } // Sign is a template parameter to workaround a bug in gcc 4.8. -template constexpr Char sign(Sign s) { +template constexpr auto sign(Sign s) -> Char { #if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604 static_assert(std::is_same::value, ""); #endif @@ -1394,7 +1425,7 @@ FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits, return out; } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). - char buffer[num_bits() / BASE_BITS + 1]; + char buffer[num_bits() / BASE_BITS + 1] = {}; format_uint(buffer, value, num_digits, upper); return detail::copy_str_noinline(buffer, buffer + num_digits, out); } @@ -1430,22 +1461,23 @@ template class to_utf8 { : "invalid utf32")); } operator string_view() const { return string_view(&buffer_[0], size()); } - size_t size() const { return buffer_.size() - 1; } - const char* c_str() const { return &buffer_[0]; } - std::string str() const { return std::string(&buffer_[0], size()); } + auto size() const -> size_t { return buffer_.size() - 1; } + auto c_str() const -> const char* { return &buffer_[0]; } + auto str() const -> std::string { return std::string(&buffer_[0], size()); } // Performs conversion returning a bool instead of throwing exception on // conversion error. This method may still throw in case of memory allocation // error. - bool convert(basic_string_view s, - to_utf8_error_policy policy = to_utf8_error_policy::abort) { + auto convert(basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { if (!convert(buffer_, s, policy)) return false; buffer_.push_back(0); return true; } - static bool convert( - Buffer& buf, basic_string_view s, - to_utf8_error_policy policy = to_utf8_error_policy::abort) { + static auto convert(Buffer& buf, basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { for (auto p = s.begin(); p != s.end(); ++p) { uint32_t c = static_cast(*p); if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) { @@ -1481,7 +1513,7 @@ template class to_utf8 { }; // Computes 128-bit result of multiplication of two 64-bit unsigned integers. -inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { +inline auto umul128(uint64_t x, uint64_t y) noexcept -> uint128_fallback { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return {static_cast(p >> 64), static_cast(p)}; @@ -1512,19 +1544,19 @@ inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { namespace dragonbox { // Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from // https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. -inline int floor_log10_pow2(int e) noexcept { +inline auto floor_log10_pow2(int e) noexcept -> int { FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); return (e * 315653) >> 20; } -inline int floor_log2_pow10(int e) noexcept { +inline auto floor_log2_pow10(int e) noexcept -> int { FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); return (e * 1741647) >> 19; } // Computes upper 64 bits of multiplication of two 64-bit unsigned integers. -inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { +inline auto umul128_upper64(uint64_t x, uint64_t y) noexcept -> uint64_t { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return static_cast(p >> 64); @@ -1537,14 +1569,14 @@ inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { // Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_upper128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_upper128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint128_fallback r = umul128(x, y.high()); r += umul128_upper64(x, y.low()); return r; } -FMT_API uint128_fallback get_cached_power(int k) noexcept; +FMT_API auto get_cached_power(int k) noexcept -> uint128_fallback; // Type-specific information that Dragonbox uses. template struct float_info; @@ -1598,14 +1630,14 @@ template FMT_API auto to_decimal(T x) noexcept -> decimal_fp; } // namespace dragonbox // Returns true iff Float has the implicit bit which is not stored. -template constexpr bool has_implicit_bit() { +template constexpr auto has_implicit_bit() -> bool { // An 80-bit FP number has a 64-bit significand an no implicit bit. return std::numeric_limits::digits != 64; } // Returns the number of significand bits stored in Float. The implicit bit is // not counted since it is not stored. -template constexpr int num_significand_bits() { +template constexpr auto num_significand_bits() -> int { // std::numeric_limits may not support __float128. return is_float128() ? 112 : (std::numeric_limits::digits - @@ -1698,7 +1730,7 @@ using fp = basic_fp; // Normalizes the value converted from double and multiplied by (1 << SHIFT). template -FMT_CONSTEXPR basic_fp normalize(basic_fp value) { +FMT_CONSTEXPR auto normalize(basic_fp value) -> basic_fp { // Handle subnormals. const auto implicit_bit = F(1) << num_significand_bits(); const auto shifted_implicit_bit = implicit_bit << SHIFT; @@ -1715,7 +1747,7 @@ FMT_CONSTEXPR basic_fp normalize(basic_fp value) { } // Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { +FMT_CONSTEXPR inline auto multiply(uint64_t lhs, uint64_t rhs) -> uint64_t { #if FMT_USE_INT128 auto product = static_cast<__uint128_t>(lhs) * rhs; auto f = static_cast(product >> 64); @@ -1732,33 +1764,10 @@ FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { #endif } -FMT_CONSTEXPR inline fp operator*(fp x, fp y) { +FMT_CONSTEXPR inline auto operator*(fp x, fp y) -> fp { return {multiply(x.f, y.f), x.e + y.e + 64}; } -template struct basic_data { - // For checking rounding thresholds. - // The kth entry is chosen to be the smallest integer such that the - // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. - static constexpr uint32_t fractional_part_rounding_thresholds[8] = { - 2576980378U, // ceil(2^31 + 2^32/10^1) - 2190433321U, // ceil(2^31 + 2^32/10^2) - 2151778616U, // ceil(2^31 + 2^32/10^3) - 2147913145U, // ceil(2^31 + 2^32/10^4) - 2147526598U, // ceil(2^31 + 2^32/10^5) - 2147487943U, // ceil(2^31 + 2^32/10^6) - 2147484078U, // ceil(2^31 + 2^32/10^7) - 2147483691U // ceil(2^31 + 2^32/10^8) - }; -}; -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct data : basic_data<> {}; - -#if FMT_CPLUSPLUS < 201703L -template -constexpr uint32_t basic_data::fractional_part_rounding_thresholds[]; -#endif - template () == num_bits()> using convert_float_result = conditional_t::value || doublish, double, T>; @@ -1939,15 +1948,11 @@ auto write_escaped_cp(OutputIt out, const find_escape_result& escape) *out++ = static_cast('\\'); break; default: - if (escape.cp < 0x100) { - return write_codepoint<2, Char>(out, 'x', escape.cp); - } - if (escape.cp < 0x10000) { + if (escape.cp < 0x100) return write_codepoint<2, Char>(out, 'x', escape.cp); + if (escape.cp < 0x10000) return write_codepoint<4, Char>(out, 'u', escape.cp); - } - if (escape.cp < 0x110000) { + if (escape.cp < 0x110000) return write_codepoint<8, Char>(out, 'U', escape.cp); - } for (Char escape_char : basic_string_view( escape.begin, to_unsigned(escape.end - escape.begin))) { out = write_codepoint<2, Char>(out, 'x', @@ -1977,11 +1982,13 @@ auto write_escaped_string(OutputIt out, basic_string_view str) template auto write_escaped_char(OutputIt out, Char v) -> OutputIt { + Char v_array[1] = {v}; *out++ = static_cast('\''); if ((needs_escape(static_cast(v)) && v != static_cast('"')) || v == static_cast('\'')) { - out = write_escaped_cp( - out, find_escape_result{&v, &v + 1, static_cast(v)}); + out = write_escaped_cp(out, + find_escape_result{v_array, v_array + 1, + static_cast(v)}); } else { *out++ = v; } @@ -2070,10 +2077,10 @@ template class digit_grouping { std::string::const_iterator group; int pos; }; - next_state initial_state() const { return {grouping_.begin(), 0}; } + auto initial_state() const -> next_state { return {grouping_.begin(), 0}; } // Returns the next digit group separator position. - int next(next_state& state) const { + auto next(next_state& state) const -> int { if (thousands_sep_.empty()) return max_value(); if (state.group == grouping_.end()) return state.pos += grouping_.back(); if (*state.group <= 0 || *state.group == max_value()) @@ -2092,9 +2099,9 @@ template class digit_grouping { digit_grouping(std::string grouping, std::basic_string sep) : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {} - bool has_separator() const { return !thousands_sep_.empty(); } + auto has_separator() const -> bool { return !thousands_sep_.empty(); } - int count_separators(int num_digits) const { + auto count_separators(int num_digits) const -> int { int count = 0; auto state = initial_state(); while (num_digits > next(state)) ++count; @@ -2103,7 +2110,7 @@ template class digit_grouping { // Applies grouping to digits and write the output to out. template - Out apply(Out out, basic_string_view digits) const { + auto apply(Out out, basic_string_view digits) const -> Out { auto num_digits = static_cast(digits.size()); auto separators = basic_memory_buffer(); separators.push_back(0); @@ -2126,24 +2133,66 @@ template class digit_grouping { } }; +FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { + prefix |= prefix != 0 ? value << 8 : value; + prefix += (1u + (value > 0xff ? 1 : 0)) << 24; +} + // Writes a decimal integer with digit grouping. template auto write_int(OutputIt out, UInt value, unsigned prefix, const format_specs& specs, const digit_grouping& grouping) -> OutputIt { static_assert(std::is_same, UInt>::value, ""); - int num_digits = count_digits(value); - char digits[40]; - format_decimal(digits, value, num_digits); - unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits + - grouping.count_separators(num_digits)); + int num_digits = 0; + auto buffer = memory_buffer(); + switch (specs.type) { + case presentation_type::none: + case presentation_type::dec: { + num_digits = count_digits(value); + format_decimal(appender(buffer), value, num_digits); + break; + } + case presentation_type::hex_lower: + case presentation_type::hex_upper: { + bool upper = specs.type == presentation_type::hex_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0'); + num_digits = count_digits<4>(value); + format_uint<4, char>(appender(buffer), value, num_digits, upper); + break; + } + case presentation_type::bin_lower: + case presentation_type::bin_upper: { + bool upper = specs.type == presentation_type::bin_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0'); + num_digits = count_digits<1>(value); + format_uint<1, char>(appender(buffer), value, num_digits); + break; + } + case presentation_type::oct: { + num_digits = count_digits<3>(value); + // Octal prefix '0' is counted as a digit, so only add it if precision + // is not greater than the number of digits. + if (specs.alt && specs.precision <= num_digits && value != 0) + prefix_append(prefix, '0'); + format_uint<3, char>(appender(buffer), value, num_digits); + break; + } + case presentation_type::chr: + return write_char(out, static_cast(value), specs); + default: + throw_format_error("invalid format specifier"); + } + + unsigned size = (prefix != 0 ? prefix >> 24 : 0) + to_unsigned(num_digits) + + to_unsigned(grouping.count_separators(num_digits)); return write_padded( out, specs, size, size, [&](reserve_iterator it) { - if (prefix != 0) { - char sign = static_cast(prefix); - *it++ = static_cast(sign); - } - return grouping.apply(it, string_view(digits, to_unsigned(num_digits))); + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + return grouping.apply(it, string_view(buffer.data(), buffer.size())); }); } @@ -2156,11 +2205,6 @@ inline auto write_loc(OutputIt, loc_value, const format_specs&, return false; } -FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { - prefix |= prefix != 0 ? value << 8 : value; - prefix += (1u + (value > 0xff ? 1 : 0)) << 24; -} - template struct write_int_arg { UInt abs_value; unsigned prefix; @@ -2307,25 +2351,25 @@ class counting_iterator { FMT_CONSTEXPR counting_iterator() : count_(0) {} - FMT_CONSTEXPR size_t count() const { return count_; } + FMT_CONSTEXPR auto count() const -> size_t { return count_; } - FMT_CONSTEXPR counting_iterator& operator++() { + FMT_CONSTEXPR auto operator++() -> counting_iterator& { ++count_; return *this; } - FMT_CONSTEXPR counting_iterator operator++(int) { + FMT_CONSTEXPR auto operator++(int) -> counting_iterator { auto it = *this; ++*this; return it; } - FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it, - difference_type n) { + FMT_CONSTEXPR friend auto operator+(counting_iterator it, difference_type n) + -> counting_iterator { it.count_ += static_cast(n); return it; } - FMT_CONSTEXPR value_type operator*() const { return {}; } + FMT_CONSTEXPR auto operator*() const -> value_type { return {}; } }; template @@ -2360,9 +2404,10 @@ template FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs, locale_ref) -> OutputIt { - return specs.type != presentation_type::pointer - ? write(out, basic_string_view(s), specs, {}) - : write_ptr(out, bit_cast(s), &specs); + if (specs.type == presentation_type::pointer) + return write_ptr(out, bit_cast(s), &specs); + if (!s) throw_format_error("string pointer is null"); + return write(out, basic_string_view(s), specs, {}); } template -FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs, - ErrorHandler&& eh = {}) +template +FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs) -> float_specs { auto result = float_specs(); result.showpoint = specs.alt; @@ -2486,7 +2530,7 @@ FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs, result.format = float_format::hex; break; default: - eh.on_error("invalid format specifier"); + throw_format_error("invalid format specifier"); break; } return result; @@ -2725,12 +2769,12 @@ template class fallback_digit_grouping { public: constexpr fallback_digit_grouping(locale_ref, bool) {} - constexpr bool has_separator() const { return false; } + constexpr auto has_separator() const -> bool { return false; } - constexpr int count_separators(int) const { return 0; } + constexpr auto count_separators(int) const -> int { return 0; } template - constexpr Out apply(Out out, basic_string_view) const { + constexpr auto apply(Out out, basic_string_view) const -> Out { return out; } }; @@ -2749,7 +2793,7 @@ FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f, } } -template constexpr bool isnan(T value) { +template constexpr auto isnan(T value) -> bool { return !(value >= value); // std::isnan doesn't support __float128. } @@ -2762,14 +2806,14 @@ struct has_isfinite> template ::value&& has_isfinite::value)> -FMT_CONSTEXPR20 bool isfinite(T value) { +FMT_CONSTEXPR20 auto isfinite(T value) -> bool { constexpr T inf = T(std::numeric_limits::infinity()); if (is_constant_evaluated()) return !detail::isnan(value) && value < inf && value > -inf; return std::isfinite(value); } template ::value)> -FMT_CONSTEXPR bool isfinite(T value) { +FMT_CONSTEXPR auto isfinite(T value) -> bool { T inf = T(std::numeric_limits::infinity()); // std::isfinite doesn't support __float128. return !detail::isnan(value) && value < inf && value > -inf; @@ -2806,10 +2850,10 @@ class bigint { basic_memory_buffer bigits_; int exp_; - FMT_CONSTEXPR20 bigit operator[](int index) const { + FMT_CONSTEXPR20 auto operator[](int index) const -> bigit { return bigits_[to_unsigned(index)]; } - FMT_CONSTEXPR20 bigit& operator[](int index) { + FMT_CONSTEXPR20 auto operator[](int index) -> bigit& { return bigits_[to_unsigned(index)]; } @@ -2905,11 +2949,11 @@ class bigint { assign(uint64_or_128_t(n)); } - FMT_CONSTEXPR20 int num_bigits() const { + FMT_CONSTEXPR20 auto num_bigits() const -> int { return static_cast(bigits_.size()) + exp_; } - FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { + FMT_NOINLINE FMT_CONSTEXPR20 auto operator<<=(int shift) -> bigint& { FMT_ASSERT(shift >= 0, ""); exp_ += shift / bigit_bits; shift %= bigit_bits; @@ -2924,13 +2968,15 @@ class bigint { return *this; } - template FMT_CONSTEXPR20 bigint& operator*=(Int value) { + template + FMT_CONSTEXPR20 auto operator*=(Int value) -> bigint& { FMT_ASSERT(value > 0, ""); multiply(uint32_or_64_or_128_t(value)); return *this; } - friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { + friend FMT_CONSTEXPR20 auto compare(const bigint& lhs, const bigint& rhs) + -> int { int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); if (num_lhs_bigits != num_rhs_bigits) return num_lhs_bigits > num_rhs_bigits ? 1 : -1; @@ -2947,8 +2993,9 @@ class bigint { } // Returns compare(lhs1 + lhs2, rhs). - friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { + friend FMT_CONSTEXPR20 auto add_compare(const bigint& lhs1, + const bigint& lhs2, const bigint& rhs) + -> int { auto minimum = [](int a, int b) { return a < b ? a : b; }; auto maximum = [](int a, int b) { return a > b ? a : b; }; int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits()); @@ -3029,13 +3076,13 @@ class bigint { bigits_.resize(to_unsigned(num_bigits + exp_difference)); for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + std::uninitialized_fill_n(bigits_.data(), exp_difference, 0u); exp_ -= exp_difference; } // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. - FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { + FMT_CONSTEXPR20 auto divmod_assign(const bigint& divisor) -> int { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); @@ -3178,8 +3225,10 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } if (buf[0] == overflow) { buf[0] = '1'; - if ((flags & dragon::fixed) != 0) buf.push_back('0'); - else ++exp10; + if ((flags & dragon::fixed) != 0) + buf.push_back('0'); + else + ++exp10; } return; } @@ -3276,6 +3325,17 @@ FMT_CONSTEXPR20 void format_hexfloat(Float value, int precision, format_hexfloat(static_cast(value), precision, specs, buf); } +constexpr auto fractional_part_rounding_thresholds(int index) -> uint32_t { + // For checking rounding thresholds. + // The kth entry is chosen to be the smallest integer such that the + // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. + // It is equal to ceil(2^31 + 2^32/10^(k + 1)). + // These are stored in a string literal because we cannot have static arrays + // in constexpr functions and non-static ones are poorly optimized. + return U"\x9999999a\x828f5c29\x80418938\x80068db9\x8000a7c6\x800010c7" + U"\x800001ae\x8000002b"[index]; +} + template FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, buffer& buf) -> int { @@ -3480,12 +3540,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // fractional part is strictly larger than 1/2. if (precision < 9) { uint32_t fractional_part = static_cast(prod); - should_round_up = fractional_part >= - data::fractional_part_rounding_thresholds - [8 - number_of_digits_to_print] || - ((fractional_part >> 31) & - ((digits & 1) | (second_third_subsegments != 0) | - has_more_segments)) != 0; + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (second_third_subsegments != 0) | + has_more_segments)) != 0; } // Rounding at the subsegment boundary. // In this case, the fractional part is at least 1/2 if and only if @@ -3520,12 +3580,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // of 19 digits, so in this case the third segment should be // consisting of a genuine digit from the input. uint32_t fractional_part = static_cast(prod); - should_round_up = fractional_part >= - data::fractional_part_rounding_thresholds - [8 - number_of_digits_to_print] || - ((fractional_part >> 31) & - ((digits & 1) | (third_subsegment != 0) | - has_more_segments)) != 0; + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (third_subsegment != 0) | + has_more_segments)) != 0; } // Rounding at the subsegment boundary. else { @@ -3726,8 +3786,7 @@ FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { } template -FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) - -> OutputIt { +FMT_CONSTEXPR20 auto write(OutputIt out, const Char* value) -> OutputIt { if (value) return write(out, basic_string_view(value)); throw_format_error("string pointer is null"); return out; @@ -3757,8 +3816,11 @@ template enable_if_t::value == type::custom_type, OutputIt> { + auto formatter = typename Context::template formatter_type(); + auto parse_ctx = typename Context::parse_context_type({}); + formatter.parse(parse_ctx); auto ctx = Context(out, {}, {}); - return typename Context::template formatter_type().format(value, ctx); + return formatter.format(value, ctx); } // An argument visitor that formats the argument and writes it via the output @@ -3801,62 +3863,39 @@ template struct arg_formatter { } }; -template struct custom_formatter { - basic_format_parse_context& parse_ctx; - buffer_context& ctx; - - void operator()( - typename basic_format_arg>::handle h) const { - h.format(parse_ctx, ctx); - } - template void operator()(T) const {} -}; - -template class width_checker { - public: - explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} - +struct width_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative width"); + if (is_negative(value)) throw_format_error("negative width"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("width is not integer"); + throw_format_error("width is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template class precision_checker { - public: - explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} - +struct precision_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative precision"); + if (is_negative(value)) throw_format_error("negative precision"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("precision is not integer"); + throw_format_error("precision is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template