Merge pull request #3264 from stanmoore1/kk_pace_release

Add Kokkos version of ML-PACE
This commit is contained in:
Axel Kohlmeyer
2022-05-13 23:36:05 -04:00
committed by GitHub
13 changed files with 2174 additions and 19 deletions

View File

@ -231,7 +231,7 @@ OPT.
* :doc:`oxrna2/stk <pair_oxrna2>`
* :doc:`oxrna2/xstk <pair_oxrna2>`
* :doc:`oxrna2/coaxstk <pair_oxrna2>`
* :doc:`pace <pair_pace>`
* :doc:`pace (k) <pair_pace>`
* :doc:`peri/eps <pair_peri>`
* :doc:`peri/lps (o) <pair_peri>`
* :doc:`peri/pmb (o) <pair_peri>`

View File

@ -1,7 +1,10 @@
.. index:: pair_style pace
.. index:: pair_style pace/kk
pair_style pace command
========================
=======================
Accelerator Variants: *pace/kk*
Syntax
""""""
@ -10,13 +13,14 @@ Syntax
pair_style pace ... keyword values ...
* an optional keyword may be appended
* keyword = *product* or *recursive*
* one or more keyword/value pairs may be appended
.. parsed-literal::
keyword = *product* or *recursive* or *chunksize*
*product* = use product algorithm for basis functions
*recursive* = use recursive algorithm for basis functions
*chunksize* value = number of atoms in each pass
Examples
""""""""
@ -24,7 +28,7 @@ Examples
.. code-block:: LAMMPS
pair_style pace
pair_style pace product
pair_style pace product chunksize 2048
pair_coeff * * Cu-PBE-core-rep.ace Cu
Description
@ -59,11 +63,19 @@ Note that unlike for other potentials, cutoffs are
not set in the pair_style or pair_coeff command; they are specified in
the ACE file.
The pair_style *pace* command may be followed by an optional keyword
The pair_style *pace* command may be followed by the optional keyword
*product* or *recursive*, which determines which of two algorithms
is used for the calculation of basis functions and derivatives.
The default is *recursive*.
The keyword *chunksize* is only applicable when
using the pair style *pace* with the KOKKOS package on GPUs and is
ignored otherwise. This keyword controls the number of atoms
in each pass used to compute the atomic cluster expansion and is used to
avoid running out of memory. For example if there are 8192 atoms in the
simulation and the *chunksize* is set to 4096, the ACE
calculation will be broken up into two passes (running on a single GPU).
See the :doc:`pair_coeff <pair_coeff>` page for alternate ways
to specify the path for the ACE coefficient file.
@ -88,6 +100,10 @@ This pair style can only be used via the *pair* keyword of the
----------
.. include:: accel_styles.rst
----------
Restrictions
""""""""""""
@ -103,7 +119,7 @@ Related commands
Default
"""""""
recursive
recursive, chunksize = 4096
.. _Drautz20191:

View File

@ -14,8 +14,6 @@ create_atoms 1 box
mass 1 26.98
group Al type 1
pair_style pace product
pair_coeff * * Cu-PBE-core-rep.ace Cu

View File

@ -14,8 +14,6 @@ create_atoms 1 box
mass 1 26.98
group Al type 1
pair_style pace recursive
pair_coeff * * Cu-PBE-core-rep.ace Cu

View File

@ -154,6 +154,10 @@ if (test $1 = "RIGID") then
depend DPD-SMOOTH
fi
if (test $1 = "ML-PACE") then
depend KOKKOS
fi
if (test $1 = "ML-SNAP") then
depend KOKKOS
depend ML-IAP

View File

@ -291,6 +291,8 @@ action pair_morse_kokkos.cpp
action pair_morse_kokkos.h
action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h
action pair_pace_kokkos.cpp pair_pace.cpp
action pair_pace_kokkos.h pair_pace.h
action pair_reaxff_kokkos.cpp pair_reaxff.cpp
action pair_reaxff_kokkos.h pair_reaxff.h
action pair_snap_kokkos.cpp pair_snap.cpp

View File

@ -1284,8 +1284,13 @@ struct alignas(2*sizeof(real_type_)) SNAComplex
static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
KOKKOS_INLINE_FUNCTION
const complex conj() { return complex(re, -im); }
const complex conj() const { return complex(re, -im); }
KOKKOS_INLINE_FUNCTION
const real_type real_part_product(const complex &cm2) { return re * cm2.re - im * cm2.im; }
KOKKOS_INLINE_FUNCTION
const real_type real_part_product(const real_type &r) const { return re * r; }
};
template <typename real_type>
@ -1293,6 +1298,16 @@ KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r,
return SNAComplex<real_type>(r*self.re, r*self.im);
}
template <typename real_type>
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const SNAComplex<real_type>& self, const real_type& r) {
return SNAComplex<real_type>(r*self.re, r*self.im);
}
template <typename real_type>
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const SNAComplex<real_type>& self, const SNAComplex<real_type>& cm2) {
return SNAComplex<real_type>(self.re*cm2.re - self.im*cm2.im, self.re*cm2.im + self.im*cm2.re);
}
typedef SNAComplex<SNAreal> SNAcomplex;
#if defined(KOKKOS_ENABLE_CXX11)

View File

@ -273,6 +273,63 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
array = nullptr;
}
/* ----------------------------------------------------------------------
reallocate Kokkos views without initialization
deallocate first to reduce memory use
------------------------------------------------------------------------- */
template <typename TYPE>
void realloc_kokkos(TYPE &data, const char *name, int n1)
{
data = TYPE();
data = TYPE(Kokkos::NoInit(std::string(name)),n1);
}
template <typename TYPE>
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2)
{
data = TYPE();
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2);
}
template <typename TYPE>
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3)
{
data = TYPE();
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3);
}
template <typename TYPE>
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4)
{
data = TYPE();
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4);
}
template <typename TYPE>
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5)
{
data = TYPE();
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5);
}
template <typename TYPE>
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6)
{
data = TYPE();
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6);
}
/* ----------------------------------------------------------------------
get memory usage of Kokkos view in bytes
------------------------------------------------------------------------- */
template <typename TYPE>
double memory_usage(TYPE &data)
{
return data.span() * sizeof(typename TYPE::value_type);
}
};
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,334 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
// clang-format off
PairStyle(pace/kk,PairPACEKokkos<LMPDeviceType>);
PairStyle(pace/kk/device,PairPACEKokkos<LMPDeviceType>);
PairStyle(pace/kk/host,PairPACEKokkos<LMPHostType>);
// clang-format on
#else
// clang-format off
#ifndef LMP_PAIR_PACE_KOKKOS_H
#define LMP_PAIR_PACE_KOKKOS_H
#include "pair_pace.h"
#include "ace_radial.h"
#include "kokkos_type.h"
#include "pair_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType>
class PairPACEKokkos : public PairPACE {
public:
struct TagPairPACEComputeNeigh{};
struct TagPairPACEComputeRadial{};
struct TagPairPACEComputeYlm{};
struct TagPairPACEComputeAi{};
struct TagPairPACEConjugateAi{};
struct TagPairPACEComputeRho{};
struct TagPairPACEComputeFS{};
struct TagPairPACEComputeWeights{};
struct TagPairPACEComputeDerivative{};
template<int NEIGHFLAG, int EVFLAG>
struct TagPairPACEComputeForce{};
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
using complex = SNAComplex<double>;
PairPACEKokkos(class LAMMPS *);
~PairPACEKokkos() override;
void compute(int, int) override;
void coeff(int, char **) override;
void init_style() override;
double init_one(int, int) override;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeNeigh>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeRadial,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeRadial>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeYlm,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeAi,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeAi>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEConjugateAi,const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeRho,const int& iter) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeFS,const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeWeights,const int& iter) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeDerivative,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeDerivative>::member_type& team) const;
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeForce<NEIGHFLAG,EVFLAG>,const int& ii) const;
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void operator() (TagPairPACEComputeForce<NEIGHFLAG,EVFLAG>,const int& ii, EV_FLOAT&) const;
protected:
int inum, maxneigh, chunk_size, chunk_offset, idx_rho_max;
int host_flag;
int eflag, vflag;
int neighflag, max_ndensity;
int nelements, lmax, nradmax, nradbase;
typename AT::t_neighbors_2d d_neighbors;
typename AT::t_int_1d_randomread d_ilist;
typename AT::t_int_1d_randomread d_numneigh;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename AT::t_efloat_1d d_eatom;
typename AT::t_virial_array d_vatom;
typename AT::t_x_array_randomread x;
typename AT::t_f_array f;
typename AT::t_int_1d_randomread type;
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
tdual_fparams k_cutsq, k_scale;
typedef Kokkos::View<F_FLOAT**, DeviceType> t_fparams;
t_fparams d_cutsq, d_scale;
typename AT::t_int_1d d_map;
int need_dup;
using KKDeviceType = typename KKDevice<DeviceType>::value;
template<typename DataType, typename Layout>
using DupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterDuplicated>;
template<typename DataType, typename Layout>
using NonDupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterNonDuplicated>;
DupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> dup_f;
DupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> dup_vatom;
NonDupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> ndup_f;
NonDupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> ndup_vatom;
friend void pair_virial_fdotr_compute<PairPACEKokkos>(PairPACEKokkos*);
void grow(int, int);
void copy_pertype();
void copy_splines();
void copy_tilde();
void allocate() override;
void precompute_harmonics();
double memory_usage();
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const;
KOKKOS_INLINE_FUNCTION
void compute_barplm(int, int, double, int) const;
KOKKOS_INLINE_FUNCTION
void compute_ylm(int, int, double, double, double, int) const;
KOKKOS_INLINE_FUNCTION
void cutoff_func_poly(const double, const double, const double, double &, double &) const;
KOKKOS_INLINE_FUNCTION
void Fexp(const double, const double, double &, double &) const;
KOKKOS_INLINE_FUNCTION
void FexpShiftedScaled(const double, const double, double &, double &) const;
KOKKOS_INLINE_FUNCTION
void inner_cutoff(const double, const double, const double, double &, double &) const;
KOKKOS_INLINE_FUNCTION
void FS_values_and_derivatives(const int, double&, const int) const;
KOKKOS_INLINE_FUNCTION
void evaluate_splines(const int, const int, double, int, int, int, int) const;
template<class TagStyle>
void check_team_size_for(int, int&, int);
template<class TagStyle>
void check_team_size_reduce(int, int&, int);
// Utility routine which wraps computing per-team scratch size requirements for
// ComputeNeigh, ComputeUi, and ComputeFusedDeidrj
template <typename scratch_type>
int scratch_size_helper(int values_per_team);
typedef Kokkos::View<int*, DeviceType> t_ace_1i;
typedef Kokkos::View<int**, DeviceType> t_ace_2i;
typedef Kokkos::View<int***, DeviceType> t_ace_3i;
typedef Kokkos::View<int****, DeviceType> t_ace_4i;
typedef Kokkos::View<double*, DeviceType> t_ace_1d;
typedef Kokkos::View<double**, DeviceType> t_ace_2d;
typedef Kokkos::View<double*[3], DeviceType> t_ace_2d3;
typedef Kokkos::View<double***, DeviceType> t_ace_3d;
typedef Kokkos::View<double**[3], DeviceType> t_ace_3d3;
typedef Kokkos::View<double**[4], DeviceType> t_ace_3d4;
typedef Kokkos::View<double****, DeviceType> t_ace_4d;
typedef Kokkos::View<complex*, DeviceType> t_ace_1c;
typedef Kokkos::View<complex**, DeviceType> t_ace_2c;
typedef Kokkos::View<complex***, DeviceType> t_ace_3c;
typedef Kokkos::View<complex**[3], DeviceType> t_ace_3c3;
typedef Kokkos::View<complex****, DeviceType> t_ace_4c;
typedef Kokkos::View<complex***[3], DeviceType> t_ace_4c3;
t_ace_3d A_rank1;
t_ace_4c A;
t_ace_3c A_list;
t_ace_3c A_forward_prod;
t_ace_3d weights_rank1;
t_ace_4c weights;
t_ace_1d e_atom;
t_ace_2d rhos;
t_ace_2d dF_drho;
// hard-core repulsion
t_ace_1d rho_core;
t_ace_3c dB_flatten;
t_ace_2d cr;
t_ace_2d dcr;
t_ace_1d dF_drho_core;
// radial functions
t_ace_4d fr;
t_ace_4d dfr;
t_ace_3d gr;
t_ace_3d dgr;
t_ace_3d d_values;
t_ace_3d d_derivatives;
// Spherical Harmonics
void pre_compute_harmonics(int);
KOKKOS_INLINE_FUNCTION
void compute_barplm(double rz, int lmaxi);
KOKKOS_INLINE_FUNCTION
void compute_ylm(double rx, double ry, double rz, int lmaxi);
t_ace_1d alm;
t_ace_1d blm;
t_ace_1d cl;
t_ace_1d dl;
t_ace_3d plm;
t_ace_3d dplm;
t_ace_3c ylm;
t_ace_4c3 dylm;
// short neigh list
t_ace_1i d_ncount;
t_ace_2d d_mu;
t_ace_2d d_rnorms;
t_ace_3d3 d_rhats;
t_ace_2i d_nearest;
// per-type
t_ace_1i d_ndensity;
t_ace_1i d_npoti;
t_ace_1d d_rho_core_cutoff;
t_ace_1d d_drho_core_cutoff;
t_ace_1d d_E0vals;
t_ace_2d d_wpre;
t_ace_2d d_mexp;
// tilde
t_ace_1i d_idx_rho_count;
t_ace_2i d_rank;
t_ace_2i d_num_ms_combs;
t_ace_2i d_offsets;
t_ace_3i d_mus;
t_ace_3i d_ns;
t_ace_3i d_ls;
t_ace_3i d_ms_combs;
t_ace_3d d_ctildes;
t_ace_3d3 f_ij;
public:
struct SplineInterpolatorKokkos {
int ntot, nlut, num_of_functions;
double cutoff, deltaSplineBins, invrscalelookup, rscalelookup;
t_ace_3d4 lookupTable;
void operator=(const SplineInterpolator &spline) {
cutoff = spline.cutoff;
deltaSplineBins = spline.deltaSplineBins;
ntot = spline.ntot;
nlut = spline.nlut;
invrscalelookup = spline.invrscalelookup;
rscalelookup = spline.rscalelookup;
num_of_functions = spline.num_of_functions;
lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions);
auto h_lookupTable = Kokkos::create_mirror_view(lookupTable);
for (int i = 0; i < ntot+1; i++)
for (int j = 0; j < num_of_functions; j++)
for (int k = 0; k < 4; k++)
h_lookupTable(i, j, k) = spline.lookupTable(i, j, k);
Kokkos::deep_copy(lookupTable, h_lookupTable);
}
void deallocate() {
lookupTable = t_ace_3d4();
}
double memory_usage() {
return lookupTable.span() * sizeof(typename decltype(lookupTable)::value_type);
}
KOKKOS_INLINE_FUNCTION
void calcSplines(const int ii, const int jj, const double r, const t_ace_3d &d_values, const t_ace_3d &d_derivatives) const;
};
Kokkos::DualView<SplineInterpolatorKokkos**, DeviceType> k_splines_gk;
Kokkos::DualView<SplineInterpolatorKokkos**, DeviceType> k_splines_rnl;
Kokkos::DualView<SplineInterpolatorKokkos**, DeviceType> k_splines_hc;
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -91,9 +91,8 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::init_style()
{
if (host_flag) {
if (lmp->kokkos->nthreads > 1)
if (comm->me == 0)
utils::logmesg(lmp,"Pair style snap/kk currently only runs on a single "
"CPU thread, even if more threads are requested\n");
error->all(FLERR,"Pair style snap/kk can currently only run on a single "
"CPU thread");
PairSNAP::init_style();
return;

View File

@ -91,6 +91,8 @@ PairPACE::PairPACE(LAMMPS *lmp) : Pair(lmp)
recursive = false;
scale = nullptr;
chunksize = 4096;
}
/* ----------------------------------------------------------------------
@ -250,18 +252,25 @@ void PairPACE::allocate()
void PairPACE::settings(int narg, char **arg)
{
if (narg > 1) error->all(FLERR, "Illegal pair_style command.");
if (narg > 3) error->all(FLERR, "Illegal pair_style command.");
// ACE potentials are parameterized in metal units
if (strcmp("metal", update->unit_style) != 0)
error->all(FLERR, "ACE potentials require 'metal' units");
recursive = true; // default evaluator style: RECURSIVE
if (narg > 0) {
if (strcmp(arg[0], "recursive") == 0)
int iarg = 0;
while (iarg < narg) {
if (strcmp(arg[iarg], "recursive") == 0) {
recursive = true;
else if (strcmp(arg[0], "product") == 0) {
iarg += 1;
} else if (strcmp(arg[iarg], "product") == 0) {
recursive = false;
iarg += 1;
} else if (strcmp(arg[iarg], "chunksize") == 0) {
chunksize = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
iarg += 2;
} else
error->all(FLERR, "Illegal pair_style command");
}

View File

@ -56,6 +56,8 @@ class PairPACE : public Pair {
double **scale;
bool recursive; // "recursive" option for ACERecursiveEvaluator
int chunksize;
};
} // namespace LAMMPS_NS