diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 7c465128d8..edc48dfe75 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -107,6 +107,8 @@ action fix_gravity_kokkos.cpp action fix_gravity_kokkos.h action fix_langevin_kokkos.cpp action fix_langevin_kokkos.h +action fix_minimize_kokkos.cpp +action fix_minimize_kokkos.h action fix_neigh_history_kokkos.cpp action fix_neigh_history_kokkos.h action fix_nh_kokkos.cpp @@ -179,6 +181,12 @@ action nbin_ssa_kokkos.cpp nbin_ssa.cpp action nbin_ssa_kokkos.h nbin_ssa.h action math_special_kokkos.cpp action math_special_kokkos.h +action min_cg_kokkos.cpp +action min_cg_kokkos.h +action min_kokkos.cpp +action min_kokkos.h +action min_linesearch_kokkos.cpp +action min_linesearch_kokkos.h action pair_buck_coul_cut_kokkos.cpp action pair_buck_coul_cut_kokkos.h action pair_buck_coul_long_kokkos.cpp pair_buck_coul_long.cpp diff --git a/src/KOKKOS/fix_minimize_kokkos.cpp b/src/KOKKOS/fix_minimize_kokkos.cpp new file mode 100644 index 0000000000..e7b10dbb8e --- /dev/null +++ b/src/KOKKOS/fix_minimize_kokkos.cpp @@ -0,0 +1,257 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_minimize_kokkos.h" +#include "atom_kokkos.h" +#include "domain.h" +#include "memory_kokkos.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixMinimizeKokkos::FixMinimizeKokkos(LAMMPS *lmp, int narg, char **arg) : + FixMinimize(lmp, narg, arg) +{ + atomKK = (AtomKokkos *) atom; +} + +/* ---------------------------------------------------------------------- */ + +FixMinimizeKokkos::~FixMinimizeKokkos() +{ + memoryKK->destroy_kokkos(k_vectors,vectors); + vectors = NULL; +} + +/* ---------------------------------------------------------------------- + allocate/initialize memory for a new vector with 3 elements per atom +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::add_vector_kokkos() +{ + int n = 3; + + memory->grow(peratom,nvector+1,"minimize:peratom"); + peratom[nvector] = n; + + // d_vectors needs to be LayoutRight for subviews + + k_vectors.sync(); + + memoryKK->grow_kokkos(k_vectors,vectors,nvector+1,atom->nmax*n, + "minimize:vectors"); + d_vectors = k_vectors.d_view; + h_vectors = k_vectors.h_view; + + k_vectors.modify(); + + nvector++; +} + +/* ---------------------------------------------------------------------- + return a pointer to the Mth vector +------------------------------------------------------------------------- */ + +DAT::t_ffloat_1d FixMinimizeKokkos::request_vector_kokkos(int m) +{ + k_vectors.sync(); + + return Kokkos::subview(d_vectors,m,Kokkos::ALL); +} + +/* ---------------------------------------------------------------------- + reset x0 for atoms that moved across PBC via reneighboring in line search + x0 = 1st vector + must do minimum_image using original box stored at beginning of line search + swap & set_global_box() change to original box, then restore current box +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::reset_coords() +{ + box_swap(); + domain->set_global_box(); + + int nlocal = atom->nlocal; + + atomKK->sync(Device,X_MASK); + k_vectors.sync(); + + { + // local variables for lambda capture + + auto triclinic = domain->triclinic; + auto xperiodic = domain->xperiodic; + auto xprd_half = domain->xprd_half; + auto xprd = domain->xprd; + auto yperiodic = domain->yperiodic; + auto yprd_half = domain->yprd_half; + auto yprd = domain->yprd; + auto zperiodic = domain->zperiodic; + auto zprd_half = domain->zprd_half; + auto zprd = domain->zprd; + auto xy = domain->xy; + auto xz = domain->xz; + auto yz = domain->yz; + auto l_x = atomKK->k_x.d_view; + auto l_x0 = Kokkos::subview(d_vectors,0,Kokkos::ALL); + + Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(const int& i) { + const int n = i*3; + double dx0 = l_x(i,0) - l_x0[n]; + double dy0 = l_x(i,1) - l_x0[n+1]; + double dz0 = l_x(i,2) - l_x0[n+2]; + double dx = dx0; + double dy = dy0; + double dz = dz0; + // domain->minimum_image(dx,dy,dz); + { + if (triclinic == 0) { + if (xperiodic) { + if (fabs(dx) > xprd_half) { + if (dx < 0.0) dx += xprd; + else dx -= xprd; + } + } + if (yperiodic) { + if (fabs(dy) > yprd_half) { + if (dy < 0.0) dy += yprd; + else dy -= yprd; + } + } + if (zperiodic) { + if (fabs(dz) > zprd_half) { + if (dz < 0.0) dz += zprd; + else dz -= zprd; + } + } + + } else { + if (zperiodic) { + if (fabs(dz) > zprd_half) { + if (dz < 0.0) { + dz += zprd; + dy += yz; + dx += xz; + } else { + dz -= zprd; + dy -= yz; + dx -= xz; + } + } + } + if (yperiodic) { + if (fabs(dy) > yprd_half) { + if (dy < 0.0) { + dy += yprd; + dx += xy; + } else { + dy -= yprd; + dx -= xy; + } + } + } + if (xperiodic) { + if (fabs(dx) > xprd_half) { + if (dx < 0.0) dx += xprd; + else dx -= xprd; + } + } + } + } // end domain->minimum_image(dx,dy,dz); + if (dx != dx0) l_x0[n] = l_x(i,0) - dx; + if (dy != dy0) l_x0[n+1] = l_x(i,1) - dy; + if (dz != dz0) l_x0[n+2] = l_x(i,2) - dz; + }); + } + k_vectors.modify(); + + box_swap(); + domain->set_global_box(); +} + +/* ---------------------------------------------------------------------- + allocate local atom-based arrays +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::grow_arrays(int nmax) +{ + k_vectors.sync(); + memoryKK->grow_kokkos(k_vectors,vectors,nvector,3*nmax,"minimize:vector"); + d_vectors = k_vectors.d_view; + h_vectors = k_vectors.h_view; + k_vectors.modify(); +} + +/* ---------------------------------------------------------------------- + copy values within local atom-based arrays +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/) +{ + int m,iper,nper,ni,nj; + + k_vectors.sync(); + + for (m = 0; m < nvector; m++) { + nper = 3; + ni = nper*i; + nj = nper*j; + for (iper = 0; iper < nper; iper++) h_vectors(m,nj++) = h_vectors(m,ni++); + } + + k_vectors.modify(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +int FixMinimizeKokkos::pack_exchange(int i, double *buf) +{ + int m,iper,nper,ni; + + k_vectors.sync(); + + int n = 0; + for (m = 0; m < nvector; m++) { + nper = peratom[m]; + ni = nper*i; + for (iper = 0; iper < nper; iper++) buf[n++] = h_vectors(m,ni++); + } + return n; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based arrays from exchange with another proc +------------------------------------------------------------------------- */ + +int FixMinimizeKokkos::unpack_exchange(int nlocal, double *buf) +{ + int m,iper,nper,ni; + + k_vectors.sync(); + + int n = 0; + for (m = 0; m < nvector; m++) { + nper = peratom[m]; + ni = nper*nlocal; + for (iper = 0; iper < nper; iper++) h_vectors(m,ni++) = buf[n++]; + } + + k_vectors.modify(); + + return n; +} diff --git a/src/KOKKOS/fix_minimize_kokkos.h b/src/KOKKOS/fix_minimize_kokkos.h new file mode 100644 index 0000000000..921cb2fc5d --- /dev/null +++ b/src/KOKKOS/fix_minimize_kokkos.h @@ -0,0 +1,58 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(MINIMIZE/kk,FixMinimizeKokkos) +FixStyle(MINIMIZE/kk/device,FixMinimizeKokkos) +FixStyle(MINIMIZE/kk/host,FixMinimizeKokkos) + +#else + +#ifndef LMP_FIX_MINIMIZE_KOKKOS_H +#define LMP_FIX_MINIMIZE_KOKKOS_H + +#include "fix_minimize.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class FixMinimizeKokkos : public FixMinimize { + friend class MinLineSearchKokkos; + + public: + FixMinimizeKokkos(class LAMMPS *, int, char **); + virtual ~FixMinimizeKokkos(); + void init() {} + + void grow_arrays(int); + void copy_arrays(int, int, int); + int pack_exchange(int, double *); + int unpack_exchange(int, double *); + + void add_vector_kokkos(); + DAT::t_float_1d request_vector_kokkos(int); + void reset_coords(); + + DAT::tdual_float_2d k_vectors; + DAT::t_float_2d d_vectors; + HAT::t_float_2d h_vectors; +}; + +} + +#endif +#endif +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/min_cg_kokkos.cpp b/src/KOKKOS/min_cg_kokkos.cpp new file mode 100644 index 0000000000..47a4513bc0 --- /dev/null +++ b/src/KOKKOS/min_cg_kokkos.cpp @@ -0,0 +1,178 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "min_cg_kokkos.h" +#include +#include +#include "update.h" +#include "output.h" +#include "timer.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "fix_minimize_kokkos.h" + +using namespace LAMMPS_NS; + +// EPS_ENERGY = minimum normalization for energy tolerance + +#define EPS_ENERGY 1.0e-8 + +/* ---------------------------------------------------------------------- */ + +MinCGKokkos::MinCGKokkos(LAMMPS *lmp) : MinLineSearchKokkos(lmp) +{ + atomKK = (AtomKokkos *) atom; + kokkosable = 1; +} + +/* ---------------------------------------------------------------------- + minimization via conjugate gradient iterations +------------------------------------------------------------------------- */ + +int MinCGKokkos::iterate(int maxiter) +{ + int fail,ntimestep; + double beta,gg,dot[2],dotall[2]; + + fix_minimize_kk->k_vectors.sync(); + fix_minimize_kk->k_vectors.modify(); + + // nlimit = max # of CG iterations before restarting + // set to ndoftotal unless too big + + int nlimit = static_cast (MIN(MAXSMALLINT,ndoftotal)); + + // initialize working vectors + + { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + auto l_fvec = fvec; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_h[i] = l_fvec[i]; + l_g[i] = l_fvec[i]; + }); + } + + gg = fnorm_sqr(); + + for (int iter = 0; iter < maxiter; iter++) { + + if (timer->check_timeout(niter)) + return TIMEOUT; + + ntimestep = ++update->ntimestep; + niter++; + + // line minimization along direction h from current atom->x + + eprevious = ecurrent; + fail = (this->*linemin)(ecurrent,alpha_final); + if (fail) return fail; + + // function evaluation criterion + + if (neval >= update->max_eval) return MAXEVAL; + + // energy tolerance criterion + + if (fabs(ecurrent-eprevious) < + update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY)) + return ETOL; + + // force tolerance criterion + + s_double2 sdot; + { + // local variables for lambda capture + + auto l_g = g; + auto l_fvec = fvec; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) { + sdot.d0 += l_fvec[i]*l_fvec[i]; + sdot.d1 += l_fvec[i]*l_g[i]; + },sdot); + } + dot[0] = sdot.d0; + dot[1] = sdot.d1; + MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world); + + if (dotall[0] < update->ftol*update->ftol) return FTOL; + + // update new search direction h from new f = -Grad(x) and old g + // this is Polak-Ribieri formulation + // beta = dotall[0]/gg would be Fletcher-Reeves + // reinitialize CG every ndof iterations by setting beta = 0.0 + + beta = MAX(0.0,(dotall[0] - dotall[1])/gg); + if ((niter+1) % nlimit == 0) beta = 0.0; + gg = dotall[0]; + + { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + auto l_fvec = fvec; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_g[i] = l_fvec[i]; + l_h[i] = l_g[i] + beta*l_h[i]; + }); + } + + // reinitialize CG if new search direction h is not downhill + + double dot_0 = 0.0; + + { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& dot_0) { + dot_0 += l_g[i]*l_h[i]; + },dot_0); + } + dot[0] = dot_0; + MPI_Allreduce(dot,dotall,1,MPI_DOUBLE,MPI_SUM,world); + + if (dotall[0] <= 0.0) { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_h[i] = l_g[i]; + }); + } + + // output for thermo, dump, restart files + + if (output->next == ntimestep) { + atomKK->sync(Host,ALL_MASK); + + timer->stamp(); + output->write(ntimestep); + timer->stamp(Timer::OUTPUT); + } + } + + return MAXITER; +} diff --git a/src/KOKKOS/min_cg_kokkos.h b/src/KOKKOS/min_cg_kokkos.h new file mode 100644 index 0000000000..745bf702c7 --- /dev/null +++ b/src/KOKKOS/min_cg_kokkos.h @@ -0,0 +1,38 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef MINIMIZE_CLASS + +MinimizeStyle(cg/kk,MinCGKokkos) +MinimizeStyle(cg/kk/device,MinCGKokkos) +MinimizeStyle(cg/kk/host,MinCGKokkos) + +#else + +#ifndef LMP_MIN_CG_KOKKOS_H +#define LMP_MIN_CG_KOKKOS_H + +#include "min_linesearch_kokkos.h" + +namespace LAMMPS_NS { + +class MinCGKokkos : public MinLineSearchKokkos { + public: + MinCGKokkos(class LAMMPS *); + int iterate(int); +}; + +} + +#endif +#endif diff --git a/src/KOKKOS/min_kokkos.cpp b/src/KOKKOS/min_kokkos.cpp new file mode 100644 index 0000000000..d40d8469b0 --- /dev/null +++ b/src/KOKKOS/min_kokkos.cpp @@ -0,0 +1,657 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "min_kokkos.h" +#include +#include +#include +#include "atom_kokkos.h" +#include "atom_vec.h" +#include "domain.h" +#include "comm.h" +#include "update.h" +#include "modify.h" +#include "fix_minimize_kokkos.h" +#include "compute.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "dihedral.h" +#include "improper.h" +#include "kspace.h" +#include "output.h" +#include "thermo.h" +#include "timer.h" +#include "memory.h" +#include "error.h" +#include "kokkos.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +MinKokkos::MinKokkos(LAMMPS *lmp) : Min(lmp) +{ + atomKK = (AtomKokkos *) atom; + fix_minimize_kk = NULL; +} + +/* ---------------------------------------------------------------------- */ + +MinKokkos::~MinKokkos() +{ + +} + +/* ---------------------------------------------------------------------- */ + +void MinKokkos::init() +{ + Min::init(); + + fix_minimize_kk = (FixMinimizeKokkos*) fix_minimize; +} + +/* ---------------------------------------------------------------------- + setup before run +------------------------------------------------------------------------- */ + +void MinKokkos::setup(int flag) +{ + if (comm->me == 0 && screen) { + fprintf(screen,"Setting up %s style minimization ...\n", + update->minimize_style); + if (flag) { + fprintf(screen," Unit style : %s\n", update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n", + update->ntimestep); + timer->print_timeout(screen); + } + } + update->setupflag = 1; + + // setup extra global dof due to fixes + // cannot be done in init() b/c update init() is before modify init() + + nextra_global = modify->min_dof(); + if (nextra_global) { + fextra = new double[nextra_global]; + if (comm->me == 0 && screen) + fprintf(screen,"WARNING: Energy due to %d extra global DOFs will" + " be included in minimizer energies\n",nextra_global); + } + + // compute for potential energy + + int id = modify->find_compute("thermo_pe"); + if (id < 0) error->all(FLERR,"Minimization could not find thermo_pe compute"); + pe_compute = modify->compute[id]; + + // style-specific setup does two tasks + // setup extra global dof vectors + // setup extra per-atom dof vectors due to requests from Pair classes + // cannot be done in init() b/c update init() is before modify/pair init() + + setup_style(); + + // ndoftotal = total dof for entire minimization problem + // dof for atoms, extra per-atom, extra global + + bigint ndofme = 3 * static_cast(atom->nlocal); + for (int m = 0; m < nextra_atom; m++) + ndofme += extra_peratom[m]*atom->nlocal; + MPI_Allreduce(&ndofme,&ndoftotal,1,MPI_LMP_BIGINT,MPI_SUM,world); + ndoftotal += nextra_global; + + // setup domain, communication and neighboring + // acquire ghosts + // build neighbor lists + + atom->setup(); + modify->setup_pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + if (atom->sortfreq > 0) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + domain->image_check(); + domain->box_too_small_check(); + modify->setup_pre_neighbor(); + neighbor->build(1); + modify->setup_post_neighbor(); + neighbor->ncalls = 0; + + // remove these restriction eventually + + if (searchflag == 0) { + if (nextra_global) + error->all(FLERR, + "Cannot use a damped dynamics min style with fix box/relax"); + if (nextra_atom) + error->all(FLERR, + "Cannot use a damped dynamics min style with per-atom DOF"); + } + + if (strcmp(update->minimize_style,"hftn") == 0) { + if (nextra_global) + error->all(FLERR, "Cannot use hftn min style with fix box/relax"); + if (nextra_atom) + error->all(FLERR, "Cannot use hftn min style with per-atom DOF"); + } + + // atoms may have migrated in comm->exchange() + + reset_vectors(); + + // compute all forces + + force->setup(); + ev_set(update->ntimestep); + force_clear(); + modify->setup_pre_force(vflag); + + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + force->pair->compute(eflag,vflag); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + timer->stamp(Timer::PAIR); + } + else if (force->pair) force->pair->compute_dummy(eflag,vflag); + + if (atomKK->molecular) { + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + force->bond->compute(eflag,vflag); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + force->angle->compute(eflag,vflag); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + force->dihedral->compute(eflag,vflag); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + force->improper->compute(eflag,vflag); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + } + timer->stamp(Timer::BOND); + } + + if(force->kspace) { + force->kspace->setup(); + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + force->kspace->compute(eflag,vflag); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + timer->stamp(Timer::KSPACE); + } else force->kspace->compute_dummy(eflag,vflag); + } + + modify->setup_pre_reverse(eflag,vflag); + if (force->newton) comm->reverse_comm(); + + // update per-atom minimization variables stored by pair styles + + if (nextra_atom) + for (int m = 0; m < nextra_atom; m++) + requestor[m]->min_xf_get(m); + + lmp->kokkos->auto_sync = 0; + modify->setup(vflag); + output->setup(flag); + lmp->kokkos->auto_sync = 1; + update->setupflag = 0; + + // stats for initial thermo output + + ecurrent = pe_compute->compute_scalar(); + if (nextra_global) ecurrent += modify->min_energy(fextra); + if (output->thermo->normflag) ecurrent /= atom->natoms; + + einitial = ecurrent; + fnorm2_init = sqrt(fnorm_sqr()); + fnorminf_init = fnorm_inf(); +} + +/* ---------------------------------------------------------------------- + setup without output or one-time post-init setup + flag = 0 = just force calculation + flag = 1 = reneighbor and force calculation +------------------------------------------------------------------------- */ + +void MinKokkos::setup_minimal(int flag) +{ + update->setupflag = 1; + + // setup domain, communication and neighboring + // acquire ghosts + // build neighbor lists + + if (flag) { + modify->setup_pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + domain->image_check(); + domain->box_too_small_check(); + modify->setup_pre_neighbor(); + neighbor->build(1); + modify->setup_post_neighbor(); + neighbor->ncalls = 0; + } + + // atoms may have migrated in comm->exchange() + + reset_vectors(); + + // compute all forces + + ev_set(update->ntimestep); + force_clear(); + modify->setup_pre_force(vflag); + + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + force->pair->compute(eflag,vflag); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + timer->stamp(Timer::PAIR); + } + else if (force->pair) force->pair->compute_dummy(eflag,vflag); + + if (atomKK->molecular) { + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + force->bond->compute(eflag,vflag); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + force->angle->compute(eflag,vflag); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + force->dihedral->compute(eflag,vflag); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + force->improper->compute(eflag,vflag); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + } + timer->stamp(Timer::BOND); + } + + if(force->kspace) { + force->kspace->setup(); + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + force->kspace->compute(eflag,vflag); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + timer->stamp(Timer::KSPACE); + } else force->kspace->compute_dummy(eflag,vflag); + } + + modify->setup_pre_reverse(eflag,vflag); + if (force->newton) comm->reverse_comm(); + + // update per-atom minimization variables stored by pair styles + + if (nextra_atom) + for (int m = 0; m < nextra_atom; m++) + requestor[m]->min_xf_get(m); + + lmp->kokkos->auto_sync = 0; + modify->setup(vflag); + lmp->kokkos->auto_sync = 1; + update->setupflag = 0; + + // stats for Finish to print + + ecurrent = pe_compute->compute_scalar(); + if (nextra_global) ecurrent += modify->min_energy(fextra); + if (output->thermo->normflag) ecurrent /= atom->natoms; + + einitial = ecurrent; + fnorm2_init = sqrt(fnorm_sqr()); + fnorminf_init = fnorm_inf(); +} + +/* ---------------------------------------------------------------------- + perform minimization, calling iterate() for N steps +------------------------------------------------------------------------- */ + +void MinKokkos::run(int n) +{ + if (nextra_global) + error->all(FLERR,"Cannot yet use extra global DOFs (e.g. fix box/relax) " + "with Kokkos minimize"); + + if (nextra_global || nextra_atom) + error->all(FLERR,"Cannot yet use extra atom DOFs (e.g. USER-AWPMD and USER-EFF packages) " + "with Kokkos minimize"); + + // minimizer iterations + + lmp->kokkos->auto_sync = 0; + atomKK->sync(Device,ALL_MASK); + + stop_condition = iterate(n); + stopstr = stopstrings(stop_condition); + + // if early exit from iterate loop: + // set update->nsteps to niter for Finish stats to print + // set output->next values to this timestep + // call energy_force() to insure vflag is set when forces computed + // output->write does final output for thermo, dump, restart files + // add ntimestep to all computes that store invocation times + // since are hardwiring call to thermo/dumps and computes may not be ready + + if (stop_condition != MAXITER) { + update->nsteps = niter; + + if (update->restrict_output == 0) { + for (int idump = 0; idump < output->ndump; idump++) + output->next_dump[idump] = update->ntimestep; + output->next_dump_any = update->ntimestep; + if (output->restart_flag) { + output->next_restart = update->ntimestep; + if (output->restart_every_single) + output->next_restart_single = update->ntimestep; + if (output->restart_every_double) + output->next_restart_double = update->ntimestep; + } + } + output->next_thermo = update->ntimestep; + + modify->addstep_compute_all(update->ntimestep); + ecurrent = energy_force(0); + + atomKK->sync(Host,ALL_MASK); + output->write(update->ntimestep); + } + + atomKK->sync(Host,ALL_MASK); + lmp->kokkos->auto_sync = 1; +} + +/* ---------------------------------------------------------------------- */ + +void MinKokkos::cleanup() +{ + modify->post_run(); + + // stats for Finish to print + + efinal = ecurrent; + fnorm2_final = sqrt(fnorm_sqr()); + fnorminf_final = fnorm_inf(); + + // reset reneighboring criteria + + neighbor->every = neigh_every; + neighbor->delay = neigh_delay; + neighbor->dist_check = neigh_dist_check; + + // delete fix at end of run, so its atom arrays won't persist + + modify->delete_fix("MINIMIZE"); + atomKK->sync(Host,ALL_MASK); + domain->box_too_small_check(); /// need KK version + atomKK->modified(Host,ALL_MASK); +} + +/* ---------------------------------------------------------------------- + evaluate potential energy and forces + may migrate atoms due to reneighboring + return new energy, which should include nextra_global dof + return negative gradient stored in atom->f + return negative gradient for nextra_global dof in fextra +------------------------------------------------------------------------- */ + +double MinKokkos::energy_force(int resetflag) +{ + // check for reneighboring + // always communicate since minimizer moved atoms + + int nflag = neighbor->decide(); + + if (nflag == 0) { + timer->stamp(); + comm->forward_comm(); + timer->stamp(Timer::COMM); + } else { + if (modify->n_min_pre_exchange) { + timer->stamp(); + modify->min_pre_exchange(); + timer->stamp(Timer::MODIFY); + } + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + if (domain->box_change) { + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + } + timer->stamp(); + comm->exchange(); + if (atom->sortfreq > 0 && + update->ntimestep >= atom->nextsort) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(Timer::COMM); + if (modify->n_min_pre_neighbor) { + modify->min_pre_neighbor(); + timer->stamp(Timer::MODIFY); + } + neighbor->build(1); + timer->stamp(Timer::NEIGH); + if (modify->n_min_post_neighbor) { + modify->min_post_neighbor(); + timer->stamp(Timer::MODIFY); + } + } + + ev_set(update->ntimestep); + force_clear(); + + timer->stamp(); + + if (modify->n_min_pre_force) { + modify->min_pre_force(vflag); + timer->stamp(Timer::MODIFY); + } + + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + force->pair->compute(eflag,vflag); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + timer->stamp(Timer::PAIR); + } + + if (atom->molecular) { + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + force->bond->compute(eflag,vflag); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + force->angle->compute(eflag,vflag); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + force->dihedral->compute(eflag,vflag); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + force->improper->compute(eflag,vflag); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + } + timer->stamp(Timer::BOND); + } + + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + force->kspace->compute(eflag,vflag); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + timer->stamp(Timer::KSPACE); + } + + if (modify->n_min_pre_reverse) { + modify->min_pre_reverse(eflag,vflag); + timer->stamp(Timer::MODIFY); + } + + if (force->newton) { + comm->reverse_comm(); + timer->stamp(Timer::COMM); + } + + // fixes that affect minimization + + if (modify->n_min_post_force) { + timer->stamp(); + modify->min_post_force(vflag); + timer->stamp(Timer::MODIFY); + } + + // compute potential energy of system + // normalize if thermo PE does + + atomKK->sync(pe_compute->execution_space,pe_compute->datamask_read); + double energy = pe_compute->compute_scalar(); + atomKK->modified(pe_compute->execution_space,pe_compute->datamask_modify); + if (nextra_global) energy += modify->min_energy(fextra); + if (output->thermo->normflag) energy /= atom->natoms; + + // if reneighbored, atoms migrated + // if resetflag = 1, update x0 of atoms crossing PBC + // reset vectors used by lo-level minimizer + + if (nflag) { + if (resetflag) fix_minimize_kk->reset_coords(); + reset_vectors(); + } + return energy; +} + +/* ---------------------------------------------------------------------- + clear force on own & ghost atoms + clear other arrays as needed +------------------------------------------------------------------------- */ + +void MinKokkos::force_clear() +{ + if (external_force_clear) return; + + // clear global force array + // if either newton flag is set, also include ghosts + + atomKK->k_f.clear_sync_state(); // ignore host forces/torques since device views + atomKK->k_torque.clear_sync_state(); // will be cleared below + + int nzero = atom->nlocal; + if (force->newton) nzero += atom->nghost; + + if (nzero) { + // local variables for lambda capture + + auto l_f = atomKK->k_f.d_view; + auto l_torque = atomKK->k_torque.d_view; + auto l_torqueflag = torqueflag; + + Kokkos::parallel_for(nzero, LAMMPS_LAMBDA(int i) { + l_f(i,0) = 0.0; + l_f(i,1) = 0.0; + l_f(i,2) = 0.0; + if (l_torqueflag) { + l_torque(i,0) = 0.0; + l_torque(i,1) = 0.0; + l_torque(i,2) = 0.0; + } + }); + } + atomKK->modified(Device,F_MASK); +} + +/* ---------------------------------------------------------------------- + compute and return ||force||_2^2 +------------------------------------------------------------------------- */ + +double MinKokkos::fnorm_sqr() +{ + + double local_norm2_sqr = 0.0; + { + // local variables for lambda capture + + auto l_fvec = fvec; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm2_sqr) { + local_norm2_sqr += l_fvec[i]*l_fvec[i]; + },local_norm2_sqr); + } + + double norm2_sqr = 0.0; + MPI_Allreduce(&local_norm2_sqr,&norm2_sqr,1,MPI_DOUBLE,MPI_SUM,world); + + return norm2_sqr; +} + +/* ---------------------------------------------------------------------- + compute and return ||force||_inf +------------------------------------------------------------------------- */ + +double MinKokkos::fnorm_inf() +{ + + double local_norm_inf = 0.0; + { + // local variables for lambda capture + + auto l_fvec = fvec; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm_inf) { + local_norm_inf = MAX(fabs(l_fvec[i]),local_norm_inf); + },Kokkos::Max(local_norm_inf)); + } + + double norm_inf = 0.0; + MPI_Allreduce(&local_norm_inf,&norm_inf,1,MPI_DOUBLE,MPI_MAX,world); + + return norm_inf; +} diff --git a/src/KOKKOS/min_kokkos.h b/src/KOKKOS/min_kokkos.h new file mode 100644 index 0000000000..04e00fa75e --- /dev/null +++ b/src/KOKKOS/min_kokkos.h @@ -0,0 +1,59 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_MIN_KOKKOS_H +#define LMP_MIN_KOKKOS_H + +#include "min.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class MinKokkos : public Min { + public: + MinKokkos(class LAMMPS *); + virtual ~MinKokkos(); + void init(); + void setup(int flag=1); + void setup_minimal(int); + void run(int); + void cleanup(); + double fnorm_sqr(); + double fnorm_inf(); + + virtual void init_style() {} + virtual void setup_style() = 0; + virtual void reset_vectors() = 0; + virtual int iterate(int) = 0; + + // possible return values of iterate() method + enum{MAXITER,MAXEVAL,ETOL,FTOL,DOWNHILL,ZEROALPHA,ZEROFORCE, + ZEROQUAD,TRSMALL,INTERROR,TIMEOUT}; + + //protected: // won't compile with CUDA + class FixMinimizeKokkos *fix_minimize_kk; // fix that stores auxiliary data + + DAT::t_ffloat_1d xvec; // variables for atomic dof, as 1d vector + DAT::t_ffloat_1d fvec; // force vector for atomic dof, as 1d vector + + double energy_force(int); + void force_clear(); +}; + +} + +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/min_linesearch_kokkos.cpp b/src/KOKKOS/min_linesearch_kokkos.cpp new file mode 100644 index 0000000000..cb07c7db86 --- /dev/null +++ b/src/KOKKOS/min_linesearch_kokkos.cpp @@ -0,0 +1,401 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "min_linesearch_kokkos.h" +#include +#include +#include "atom_kokkos.h" +#include "modify.h" +#include "fix_minimize_kokkos.h" +#include "pair.h" +#include "output.h" +#include "thermo.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +// ALPHA_MAX = max alpha allowed to avoid long backtracks +// ALPHA_REDUCE = reduction ratio, should be in range [0.5,1) +// BACKTRACK_SLOPE, should be in range (0,0.5] +// QUADRATIC_TOL = tolerance on alpha0, should be in range [0.1,1) +// EMACH = machine accuracy limit of energy changes (1.0e-8) +// EPS_QUAD = tolerance for quadratic projection + +#define ALPHA_MAX 1.0 +#define ALPHA_REDUCE 0.5 +#define BACKTRACK_SLOPE 0.4 +#define QUADRATIC_TOL 0.1 +//#define EMACH 1.0e-8 +#define EMACH 1.0e-8 +#define EPS_QUAD 1.0e-28 + +/* ---------------------------------------------------------------------- */ + +MinLineSearchKokkos::MinLineSearchKokkos(LAMMPS *lmp) : MinKokkos(lmp) +{ + searchflag = 1; + atomKK = (AtomKokkos *) atom; +} + +/* ---------------------------------------------------------------------- */ + +MinLineSearchKokkos::~MinLineSearchKokkos() +{ + +} + +/* ---------------------------------------------------------------------- */ + +void MinLineSearchKokkos::init() +{ + MinKokkos::init(); + + if (linestyle == 1) linemin = &MinLineSearchKokkos::linemin_quadratic; + else error->all(FLERR,"Kokkos minimize only supports the 'min_modify line " + "quadratic' option"); +} + +/* ---------------------------------------------------------------------- */ + +void MinLineSearchKokkos::setup_style() +{ + // memory for x0,g,h for atomic dof + + fix_minimize_kk->add_vector_kokkos(); + fix_minimize_kk->add_vector_kokkos(); + fix_minimize_kk->add_vector_kokkos(); +} + +/* ---------------------------------------------------------------------- + set current vector lengths and pointers + called after atoms have migrated +------------------------------------------------------------------------- */ + +void MinLineSearchKokkos::reset_vectors() +{ + // atomic dof + + nvec = 3 * atom->nlocal; + atomKK->sync(Device,F_MASK|X_MASK); + auto d_x = atomKK->k_x.d_view; + auto d_f = atomKK->k_f.d_view; + + if (nvec) xvec = DAT::t_ffloat_1d(d_x.data(),d_x.size()); + if (nvec) fvec = DAT::t_ffloat_1d(d_f.data(),d_f.size()); + x0 = fix_minimize_kk->request_vector_kokkos(0); + g = fix_minimize_kk->request_vector_kokkos(1); + h = fix_minimize_kk->request_vector_kokkos(2); + + auto h_fvec = Kokkos::create_mirror_view(fvec); + Kokkos::deep_copy(h_fvec,fvec); +} + +/* ---------------------------------------------------------------------- + line minimization methods + find minimum-energy starting at x along h direction + input args: eoriginal = energy at initial x + input extra: n,x,x0,f,h for atomic, extra global, extra per-atom dof + output args: return 0 if successful move, non-zero alpha + return non-zero if failed + alpha = distance moved along h for x at min eng config + update neval counter of eng/force function evaluations + output extra: if fail, energy_force() of original x + if succeed, energy_force() at x + alpha*h + atom->x = coords at new configuration + atom->f = force at new configuration + ecurrent = energy of new configuration +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + // linemin: quadratic line search (adapted from Dennis and Schnabel) + // The objective function is approximated by a quadratic + // function in alpha, for sufficiently small alpha. + // This idea is the same as that used in the well-known secant + // method. However, since the change in the objective function + // (difference of two finite numbers) is not known as accurately + // as the gradient (which is close to zero), all the expressions + // are written in terms of gradients. In this way, we can converge + // the LAMMPS forces much closer to zero. + // + // We know E,Eprev,fh,fhprev. The Taylor series about alpha_prev + // truncated at the quadratic term is: + // + // E = Eprev - del_alpha*fhprev + (1/2)del_alpha^2*Hprev + // + // and + // + // fh = fhprev - del_alpha*Hprev + // + // where del_alpha = alpha-alpha_prev + // + // We solve these two equations for Hprev and E=Esolve, giving: + // + // Esolve = Eprev - del_alpha*(f+fprev)/2 + // + // We define relerr to be: + // + // relerr = |(Esolve-E)/Eprev| + // = |1.0 - (0.5*del_alpha*(f+fprev)+E)/Eprev| + // + // If this is accurate to within a reasonable tolerance, then + // we go ahead and use a secant step to fh = 0: + // + // alpha0 = alpha - (alpha-alphaprev)*fh/delfh; + // +------------------------------------------------------------------------- */ + +int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha) +{ + double fdothall,fdothme,hme,hmaxall; + double de_ideal,de; + double delfh,engprev,relerr,alphaprev,fhprev,ff,fh,alpha0; + double dot[2],dotall[2]; + double alphamax; + + fix_minimize_kk->k_vectors.sync(); + fix_minimize_kk->k_vectors.modify(); + + // fdothall = projection of search dir along downhill gradient + // if search direction is not downhill, exit with error + + fdothme = 0.0; + { + // local variables for lambda capture + + auto l_fvec = fvec; + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& fdothme) { + fdothme += l_fvec[i]*l_h[i]; + },fdothme); + } + MPI_Allreduce(&fdothme,&fdothall,1,MPI_DOUBLE,MPI_SUM,world); + if (output->thermo->normflag) fdothall /= atom->natoms; + + if (fdothall <= 0.0) return DOWNHILL; + + // set alphamax so no dof is changed by more than max allowed amount + // for atom coords, max amount = dmax + // for extra per-atom dof, max amount = extra_max[] + // for extra global dof, max amount is set by fix + // also insure alphamax <= ALPHA_MAX + // else will have to backtrack from huge value when forces are tiny + // if all search dir components are already 0.0, exit with error + + + hme = 0.0; + { + // local variables for lambda capture + + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& hme) { + hme = MAX(hme,fabs(l_h[i])); + },Kokkos::Max(hme)); + } + MPI_Allreduce(&hme,&hmaxall,1,MPI_DOUBLE,MPI_MAX,world); + alphamax = MIN(ALPHA_MAX,dmax/hmaxall); + + if (hmaxall == 0.0) return ZEROFORCE; + + // store box and values of all dof at start of linesearch + + { + // local variables for lambda capture + + auto l_xvec = xvec; + auto l_x0 = x0; + + fix_minimize_kk->store_box(); + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_x0[i] = l_xvec[i]; + }); + } + + // backtrack with alpha until energy decrease is sufficient + // or until get to small energy change, then perform quadratic projection + + alpha = alphamax; + fhprev = fdothall; + engprev = eoriginal; + alphaprev = 0.0; + + // // important diagnostic: test the gradient against energy + // double etmp; + // double alphatmp = alphamax*1.0e-4; + // etmp = alpha_step(alphatmp,1); + // printf("alpha = %g dele = %g dele_force = %g err = %g\n", + // alphatmp,etmp-eoriginal,-alphatmp*fdothall, + // etmp-eoriginal+alphatmp*fdothall); + // alpha_step(0.0,1); + + while (1) { + ecurrent = alpha_step(alpha,1); + + // compute new fh, alpha, delfh + + s_double2 sdot; + { + // local variables for lambda capture + + auto l_fvec = fvec; + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) { + sdot.d0 += l_fvec[i]*l_fvec[i]; + sdot.d1 += l_fvec[i]*l_h[i]; + },sdot); + } + dot[0] = sdot.d0; + dot[1] = sdot.d1; + + MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world); + ff = dotall[0]; + fh = dotall[1]; + if (output->thermo->normflag) { + ff /= atom->natoms; + fh /= atom->natoms; + } + + delfh = fh - fhprev; + + // if fh or delfh is epsilon, reset to starting point, exit with error + + if (fabs(fh) < EPS_QUAD || fabs(delfh) < EPS_QUAD) { + ecurrent = alpha_step(0.0,0); + return ZEROQUAD; + } + + // Check if ready for quadratic projection, equivalent to secant method + // alpha0 = projected alpha + + relerr = fabs(1.0-(0.5*(alpha-alphaprev)*(fh+fhprev)+ecurrent)/engprev); + alpha0 = alpha - (alpha-alphaprev)*fh/delfh; + + if (relerr <= QUADRATIC_TOL && alpha0 > 0.0 && alpha0 < alphamax) { + ecurrent = alpha_step(alpha0,1); + if (ecurrent - eoriginal < EMACH) { + return 0; + } + } + + // if backtracking energy change is better than ideal, exit with success + + de_ideal = -BACKTRACK_SLOPE*alpha*fdothall; + de = ecurrent - eoriginal; + + if (de <= de_ideal) + return 0; + + // save previous state + + fhprev = fh; + engprev = ecurrent; + alphaprev = alpha; + + // reduce alpha + + alpha *= ALPHA_REDUCE; + + // backtracked all the way to 0.0 + // reset to starting point, exit with error + + if (alpha <= 0.0 || de_ideal >= -EMACH) { + ecurrent = alpha_step(0.0,0); + return ZEROALPHA; + } + } +} + +/* ---------------------------------------------------------------------- */ + +double MinLineSearchKokkos::alpha_step(double alpha, int resetflag) +{ + // reset to starting point + + atomKK->k_x.clear_sync_state(); // ignore if host positions since device + // positions will be reset below + { + // local variables for lambda capture + + auto l_xvec = xvec; + auto l_x0 = x0; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_xvec[i] = l_x0[i]; + }); + } + + // step forward along h + + if (alpha > 0.0) { + // local variables for lambda capture + + auto l_xvec = xvec; + auto l_h = h; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_xvec[i] += alpha*l_h[i]; + }); + } + + atomKK->modified(Device,X_MASK); + + // compute and return new energy + + neval++; + return energy_force(resetflag); +} + +/* ---------------------------------------------------------------------- */ + +// compute projection of force on: itself and the search direction + +double MinLineSearchKokkos::compute_dir_deriv(double &ff) +{ + double dot[2],dotall[2]; + double fh; + + // compute new fh, alpha, delfh + + s_double2 sdot; + { + // local variables for lambda capture + + auto l_fvec = fvec; + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) { + sdot.d0 += l_fvec[i]*l_fvec[i]; + sdot.d1 += l_fvec[i]*l_h[i]; + },sdot); + } + dot[0] = sdot.d0; + dot[1] = sdot.d1; + + MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world); + + ff = dotall[0]; + fh = dotall[1]; + if (output->thermo->normflag) { + ff /= atom->natoms; + fh /= atom->natoms; + } + + return fh; +} diff --git a/src/KOKKOS/min_linesearch_kokkos.h b/src/KOKKOS/min_linesearch_kokkos.h new file mode 100644 index 0000000000..70df13d1e7 --- /dev/null +++ b/src/KOKKOS/min_linesearch_kokkos.h @@ -0,0 +1,69 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_MIN_LSRCH_KOKKOS_H +#define LMP_MIN_LSRCH_KOKKOS_H + +#include "min_kokkos.h" + +namespace LAMMPS_NS { + + struct s_double2 { + double d0, d1; + KOKKOS_INLINE_FUNCTION + s_double2() { + d0 = d1 = 0.0; + } + KOKKOS_INLINE_FUNCTION + s_double2& operator+=(const s_double2 &rhs){ + d0 += rhs.d0; + d1 += rhs.d1; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile s_double2 &rhs) volatile { + d0 += rhs.d0; + d1 += rhs.d1; + } + }; + //typedef s_double2 double2; + +class MinLineSearchKokkos : public MinKokkos { + public: + MinLineSearchKokkos(class LAMMPS *); + ~MinLineSearchKokkos(); + void init(); + void setup_style(); + void reset_vectors(); + + //protected: // won't compile with CUDA + // vectors needed by linesearch minimizers + // allocated and stored by fix_minimize + // x,f are stored by parent or Atom class or Pair class + + DAT::t_ffloat_1d x0; // coords at start of linesearch + DAT::t_ffloat_1d g; // old gradient vector + DAT::t_ffloat_1d h; // search direction vector + + typedef int (MinLineSearchKokkos::*FnPtr)(double, double &); + FnPtr linemin; + int linemin_quadratic(double, double &); + + double alpha_step(double, int); + double compute_dir_deriv(double &); +}; + +} + +#endif diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 2d2f0a9815..23952b71d8 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -143,7 +143,6 @@ void VerletKokkos::setup(int flag) } else if (force->pair) force->pair->compute_dummy(eflag,vflag); - if (atomKK->molecular) { if (force->bond) { atomKK->sync(force->bond->execution_space,force->bond->datamask_read); @@ -248,7 +247,6 @@ void VerletKokkos::setup_minimal(int flag) } else if (force->pair) force->pair->compute_dummy(eflag,vflag); - if (atomKK->molecular) { if (force->bond) { atomKK->sync(force->bond->execution_space,force->bond->datamask_read); @@ -285,7 +283,9 @@ void VerletKokkos::setup_minimal(int flag) if (force->newton) comm->reverse_comm(); + lmp->kokkos->auto_sync = 0; modify->setup(vflag); + lmp->kokkos->auto_sync = 1; update->setupflag = 0; } diff --git a/src/REPLICA/tad.cpp b/src/REPLICA/tad.cpp index 8a51f6d00e..b2fbb74071 100644 --- a/src/REPLICA/tad.cpp +++ b/src/REPLICA/tad.cpp @@ -197,7 +197,7 @@ void TAD::command(int narg, char **arg) args = new char*[narg2]; args[0] = min_style; - update->create_minimize(narg2,args); + update->create_minimize(narg2,args,1); delete [] args; @@ -711,7 +711,7 @@ void TAD::perform_neb(int ievent) args = new char*[narg2]; args[0] = min_style_neb; - update->create_minimize(narg2,args); + update->create_minimize(narg2,args,1); delete [] args; @@ -777,7 +777,7 @@ void TAD::perform_neb(int ievent) args = new char*[narg2]; args[0] = min_style; - update->create_minimize(narg2,args); + update->create_minimize(narg2,args,1); update->etol = etol; update->ftol = ftol; diff --git a/src/fix_minimize.cpp b/src/fix_minimize.cpp index df2dfb02a7..93b13fac49 100644 --- a/src/fix_minimize.cpp +++ b/src/fix_minimize.cpp @@ -42,8 +42,10 @@ FixMinimize::~FixMinimize() // delete locally stored data memory->destroy(peratom); - for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]); - memory->sfree(vectors); + if (vectors) { + for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]); + memory->sfree(vectors); + } } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_minimize.h b/src/fix_minimize.h index 7b9f571628..615eb6a095 100644 --- a/src/fix_minimize.h +++ b/src/fix_minimize.h @@ -29,22 +29,22 @@ class FixMinimize : public Fix { public: FixMinimize(class LAMMPS *, int, char **); - ~FixMinimize(); + virtual ~FixMinimize(); int setmask(); - void init() {} + virtual void init() {} double memory_usage(); - void grow_arrays(int); - void copy_arrays(int, int, int); - int pack_exchange(int, double *); - int unpack_exchange(int, double *); + virtual void grow_arrays(int); + virtual void copy_arrays(int, int, int); + virtual int pack_exchange(int, double *); + virtual int unpack_exchange(int, double *); - void add_vector(int); + virtual void add_vector(int); double *request_vector(int); void store_box(); void reset_coords(); - private: + protected: int nvector; int *peratom; double **vectors; diff --git a/src/input.cpp b/src/input.cpp index 6871ee23f5..6adf37e847 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1672,7 +1672,7 @@ void Input::min_style() { if (domain->box_exist == 0) error->all(FLERR,"Min_style command before simulation box is defined"); - update->create_minimize(narg,arg); + update->create_minimize(narg,arg,1); } /* ---------------------------------------------------------------------- */ diff --git a/src/min.cpp b/src/min.cpp index 408954edb8..b6bed01c75 100644 --- a/src/min.cpp +++ b/src/min.cpp @@ -68,6 +68,8 @@ Min::Min(LAMMPS *lmp) : Pointers(lmp) requestor = NULL; external_force_clear = 0; + + kokkosable = 0; } /* ---------------------------------------------------------------------- */ @@ -93,6 +95,10 @@ Min::~Min() void Min::init() { + if (lmp->kokkos && !kokkosable) + error->all(FLERR,"Must use a Kokkos-enabled min style (e.g. min_style cg/kk) " + "with Kokkos minimize"); + // create fix needed for storing atom-based quantities // will delete it at end of run diff --git a/src/min.h b/src/min.h index a63254231c..9578ba0de3 100644 --- a/src/min.h +++ b/src/min.h @@ -31,16 +31,16 @@ class Min : protected Pointers { Min(class LAMMPS *); virtual ~Min(); virtual void init(); - void setup(int flag=1); - void setup_minimal(int); - void run(int); - void cleanup(); + virtual void setup(int flag=1); + virtual void setup_minimal(int); + virtual void run(int); + virtual void cleanup(); int request(class Pair *, int, double); virtual bigint memory_usage() {return 0;} void modify_params(int, char **); virtual int modify_param(int, char **) {return 0;} - double fnorm_sqr(); - double fnorm_inf(); + virtual double fnorm_sqr(); + virtual double fnorm_inf(); virtual void init_style() {} virtual void setup_style() = 0; @@ -97,10 +97,12 @@ class Min : protected Pointers { double *extra_max; // max allowed change per iter for atom's var class Pair **requestor; // Pair that stores/manipulates the variable + int kokkosable; // 1 if this min style supports Kokkos + int neigh_every,neigh_delay,neigh_dist_check; // neighboring params - double energy_force(int); - void force_clear(); + virtual double energy_force(int); + virtual void force_clear(); double compute_force_norm_sqr(); double compute_force_norm_inf(); diff --git a/src/minimize.cpp b/src/minimize.cpp index e1af924019..94fdcd8681 100644 --- a/src/minimize.cpp +++ b/src/minimize.cpp @@ -52,9 +52,6 @@ void Minimize::command(int narg, char **arg) if (update->laststep < 0) error->all(FLERR,"Too many iterations"); - if (lmp->kokkos) - error->all(FLERR,"Cannot yet use minimize with Kokkos"); - lmp->init(); timer->init_timeout(); update->minimize->setup(); diff --git a/src/update.cpp b/src/update.cpp index 8a3e6f0d1d..68a6a1a057 100644 --- a/src/update.cpp +++ b/src/update.cpp @@ -78,7 +78,7 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp) create_integrate(1,&str,1); str = (char *) "cg"; - create_minimize(1,&str); + create_minimize(1,&str,1); } /* ---------------------------------------------------------------------- */ @@ -376,22 +376,69 @@ Integrate *Update::integrate_creator(LAMMPS *lmp, int narg, char ** arg) /* ---------------------------------------------------------------------- */ -void Update::create_minimize(int narg, char **arg) +void Update::create_minimize(int narg, char **arg, int trysuffix) { - if (narg != 1) error->all(FLERR,"Illegal min_style command"); + if (narg < 1) error->all(FLERR,"Illegal run_style command"); delete [] minimize_style; delete minimize; - if (minimize_map->find(arg[0]) != minimize_map->end()) { - MinimizeCreator minimize_creator = (*minimize_map)[arg[0]]; - minimize = minimize_creator(lmp); - } - else error->all(FLERR,"Illegal min_style command"); + int sflag; + new_minimize(arg[0],narg-1,&arg[1],trysuffix,sflag); - int n = strlen(arg[0]) + 1; - minimize_style = new char[n]; - strcpy(minimize_style,arg[0]); + if (sflag) { + char estyle[256]; + if (sflag == 1) snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix); + else snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix2); + int n = strlen(estyle) + 1; + minimize_style = new char[n]; + strcpy(minimize_style,estyle); + } else { + int n = strlen(arg[0]) + 1; + minimize_style = new char[n]; + strcpy(minimize_style,arg[0]); + } +} + +/* ---------------------------------------------------------------------- + create the Minimize style, first with suffix appended +------------------------------------------------------------------------- */ + +void Update::new_minimize(char *style, int narg, char **arg, + int trysuffix, int &sflag) +{ + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + snprintf(estyle,256,"%s/%s",style,lmp->suffix); + if (minimize_map->find(estyle) != minimize_map->end()) { + MinimizeCreator minimize_creator = (*minimize_map)[estyle]; + minimize = minimize_creator(lmp); + return; + } + } + + if (lmp->suffix2) { + sflag = 2; + char estyle[256]; + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); + if (minimize_map->find(estyle) != minimize_map->end()) { + MinimizeCreator minimize_creator = (*minimize_map)[estyle]; + minimize = minimize_creator(lmp); + return; + } + } + } + + sflag = 0; + if (minimize_map->find(style) != minimize_map->end()) { + MinimizeCreator minimize_creator = (*minimize_map)[style]; + minimize = minimize_creator(lmp); + return; + } + + error->all(FLERR,"Illegal minimize style"); } /* ---------------------------------------------------------------------- diff --git a/src/update.h b/src/update.h index d3602ef21e..e70325a498 100644 --- a/src/update.h +++ b/src/update.h @@ -63,7 +63,7 @@ class Update : protected Pointers { void init(); void set_units(const char *); void create_integrate(int, char **, int); - void create_minimize(int, char **); + void create_minimize(int, char **, int); void reset_timestep(int, char **); void reset_timestep(bigint); void update_time(); @@ -71,6 +71,7 @@ class Update : protected Pointers { private: void new_integrate(char *, int, char **, int, int &); + void new_minimize(char *, int, char **, int, int &); template static Integrate *integrate_creator(LAMMPS *, int, char **); template static Min *minimize_creator(LAMMPS *);