Merge pull request #3264 from stanmoore1/kk_pace_release

Add Kokkos version of ML-PACE
2022-05-13 23:36:05 -04:00
parent 04537c9f22 545cec1785
commit b8a4ddc42a
13 changed files with 2174 additions and 19 deletions
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@ -231,7 +231,7 @@ OPT.
   * :doc:`oxrna2/stk <pair_oxrna2>`
   * :doc:`oxrna2/xstk <pair_oxrna2>`
   * :doc:`oxrna2/coaxstk <pair_oxrna2>`
-   * :doc:`pace <pair_pace>`
+   * :doc:`pace (k) <pair_pace>`
   * :doc:`peri/eps <pair_peri>`
   * :doc:`peri/lps (o) <pair_peri>`
   * :doc:`peri/pmb (o) <pair_peri>`
--- a/doc/src/pair_pace.rst
+++ b/doc/src/pair_pace.rst
@ -1,7 +1,10 @@
 .. index:: pair_style pace
+.. index:: pair_style pace/kk

 pair_style pace command
-========================
+=======================
+
+Accelerator Variants: *pace/kk*

 Syntax
 """"""
@ -10,13 +13,14 @@ Syntax

   pair_style pace ... keyword values ...

-* an optional keyword may be appended
-* keyword = *product* or *recursive*
+* one or more keyword/value pairs may be appended

  .. parsed-literal::

+     keyword = *product* or *recursive* or *chunksize*
       *product* = use product algorithm for basis functions
       *recursive* = use recursive algorithm for basis functions
+       *chunksize* value = number of atoms in each pass

 Examples
 """"""""
@ -24,7 +28,7 @@ Examples
 .. code-block:: LAMMPS

   pair_style pace
-   pair_style pace product
+   pair_style pace product chunksize 2048
   pair_coeff * * Cu-PBE-core-rep.ace Cu

 Description
@ -59,11 +63,19 @@ Note that unlike for other potentials, cutoffs are
 not set in the pair_style or pair_coeff command; they are specified in
 the ACE file.

-The pair_style *pace* command may be followed by an optional keyword
+The pair_style *pace* command may be followed by the optional keyword
 *product* or *recursive*, which determines which of two algorithms
 is used for the calculation of basis functions and derivatives.
 The default is *recursive*.

+The keyword *chunksize* is only applicable when
+using the pair style *pace* with the KOKKOS package on GPUs and is
+ignored otherwise.  This keyword controls the number of atoms
+in each pass used to compute the atomic cluster expansion and is used to
+avoid running out of memory.  For example if there are 8192 atoms in the
+simulation and the *chunksize* is set to 4096, the ACE
+calculation will be broken up into two passes (running on a single GPU).
+
 See the :doc:`pair_coeff <pair_coeff>` page for alternate ways
 to specify the path for the ACE coefficient file.

@ -88,6 +100,10 @@ This pair style can only be used via the *pair* keyword of the

 ----------

+.. include:: accel_styles.rst
+
+----------
+
 Restrictions
 """"""""""""

@ -103,7 +119,7 @@ Related commands
 Default
 """""""

-recursive
+recursive, chunksize = 4096

 .. _Drautz20191:

--- a/examples/PACKAGES/pace/in.pace.product
+++ b/examples/PACKAGES/pace/in.pace.product
@ -14,8 +14,6 @@ create_atoms	1 box

 mass		1 26.98

-group		Al type 1
-
 pair_style 	pace product
 pair_coeff  * * Cu-PBE-core-rep.ace Cu

--- a/examples/PACKAGES/pace/in.pace.recursive
+++ b/examples/PACKAGES/pace/in.pace.recursive
@ -14,8 +14,6 @@ create_atoms	1 box

 mass		1 26.98

-group		Al type 1
-
 pair_style 	pace recursive
 pair_coeff  * * Cu-PBE-core-rep.ace Cu

--- a/src/Depend.sh
+++ b/src/Depend.sh
@ -154,6 +154,10 @@ if (test $1 = "RIGID") then
  depend DPD-SMOOTH
 fi

+if (test $1 = "ML-PACE") then
+  depend KOKKOS
+fi
+
 if (test $1 = "ML-SNAP") then
  depend KOKKOS
  depend ML-IAP
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@ -291,6 +291,8 @@ action pair_morse_kokkos.cpp
 action pair_morse_kokkos.h
 action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
 action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h
+action pair_pace_kokkos.cpp pair_pace.cpp
+action pair_pace_kokkos.h pair_pace.h
 action pair_reaxff_kokkos.cpp pair_reaxff.cpp
 action pair_reaxff_kokkos.h pair_reaxff.h
 action pair_snap_kokkos.cpp pair_snap.cpp
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@ -1284,8 +1284,13 @@ struct alignas(2*sizeof(real_type_)) SNAComplex
  static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }

  KOKKOS_INLINE_FUNCTION
-  const complex conj() { return complex(re, -im); }
+  const complex conj() const { return complex(re, -im); }

+  KOKKOS_INLINE_FUNCTION
+  const real_type real_part_product(const complex &cm2) { return re * cm2.re - im * cm2.im; }
+
+  KOKKOS_INLINE_FUNCTION
+  const real_type real_part_product(const real_type &r) const { return re * r; }
 };

 template <typename real_type>
@ -1293,6 +1298,16 @@ KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r,
  return SNAComplex<real_type>(r*self.re, r*self.im);
 }

+template <typename real_type>
+KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const SNAComplex<real_type>& self, const real_type& r) {
+  return SNAComplex<real_type>(r*self.re, r*self.im);
+}
+
+template <typename real_type>
+KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const SNAComplex<real_type>& self, const SNAComplex<real_type>& cm2) {
+  return SNAComplex<real_type>(self.re*cm2.re - self.im*cm2.im, self.re*cm2.im + self.im*cm2.re);
+}
+
 typedef SNAComplex<SNAreal> SNAcomplex;

 #if defined(KOKKOS_ENABLE_CXX11)
--- a/src/KOKKOS/memory_kokkos.h
+++ b/src/KOKKOS/memory_kokkos.h
@ -273,6 +273,63 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
  array = nullptr;
 }

+/* ----------------------------------------------------------------------
+   reallocate Kokkos views without initialization
+   deallocate first to reduce memory use
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+void realloc_kokkos(TYPE &data, const char *name, int n1)
+{
+  data = TYPE();
+  data = TYPE(Kokkos::NoInit(std::string(name)),n1);
+}
+
+template <typename TYPE>
+void realloc_kokkos(TYPE &data, const char *name, int n1, int n2)
+{
+  data = TYPE();
+  data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2);
+}
+
+template <typename TYPE>
+void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3)
+{
+  data = TYPE();
+  data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3);
+}
+
+template <typename TYPE>
+void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4)
+{
+  data = TYPE();
+  data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4);
+}
+
+template <typename TYPE>
+void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5)
+{
+  data = TYPE();
+  data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5);
+}
+
+template <typename TYPE>
+void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6)
+{
+  data = TYPE();
+  data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6);
+}
+
+/* ----------------------------------------------------------------------
+   get memory usage of Kokkos view in bytes
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+double memory_usage(TYPE &data)
+{
+  return data.span() * sizeof(typename TYPE::value_type);
+}
+
 };

 }
--- a/src/KOKKOS/pair_pace_kokkos.cpp
+++ b/src/KOKKOS/pair_pace_kokkos.cpp
--- a/src/KOKKOS/pair_pace_kokkos.h
+++ b/src/KOKKOS/pair_pace_kokkos.h
@ -0,0 +1,334 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(pace/kk,PairPACEKokkos<LMPDeviceType>);
+PairStyle(pace/kk/device,PairPACEKokkos<LMPDeviceType>);
+PairStyle(pace/kk/host,PairPACEKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_PAIR_PACE_KOKKOS_H
+#define LMP_PAIR_PACE_KOKKOS_H
+
+#include "pair_pace.h"
+#include "ace_radial.h"
+#include "kokkos_type.h"
+#include "pair_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairPACEKokkos : public PairPACE {
+ public:
+  struct TagPairPACEComputeNeigh{};
+  struct TagPairPACEComputeRadial{};
+  struct TagPairPACEComputeYlm{};
+  struct TagPairPACEComputeAi{};
+  struct TagPairPACEConjugateAi{};
+  struct TagPairPACEComputeRho{};
+  struct TagPairPACEComputeFS{};
+  struct TagPairPACEComputeWeights{};
+  struct TagPairPACEComputeDerivative{};
+
+  template<int NEIGHFLAG, int EVFLAG>
+  struct TagPairPACEComputeForce{};
+
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+  using complex = SNAComplex<double>;
+
+  PairPACEKokkos(class LAMMPS *);
+  ~PairPACEKokkos() override;
+
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeNeigh>::member_type& team) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeRadial,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeRadial>::member_type& team) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeYlm,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm>::member_type& team) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeAi,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeAi>::member_type& team) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEConjugateAi,const int& ii) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeRho,const int& iter) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeFS,const int& ii) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeWeights,const int& iter) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeDerivative,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeDerivative>::member_type& team) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeForce<NEIGHFLAG,EVFLAG>,const int& ii) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagPairPACEComputeForce<NEIGHFLAG,EVFLAG>,const int& ii, EV_FLOAT&) const;
+
+ protected:
+  int inum, maxneigh, chunk_size, chunk_offset, idx_rho_max;
+  int host_flag;
+
+  int eflag, vflag;
+
+  int neighflag, max_ndensity;
+  int nelements, lmax, nradmax, nradbase;
+
+  typename AT::t_neighbors_2d d_neighbors;
+  typename AT::t_int_1d_randomread d_ilist;
+  typename AT::t_int_1d_randomread d_numneigh;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+
+  typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
+  tdual_fparams k_cutsq, k_scale;
+  typedef Kokkos::View<F_FLOAT**, DeviceType> t_fparams;
+  t_fparams d_cutsq, d_scale;
+
+  typename AT::t_int_1d d_map;
+
+  int need_dup;
+
+  using KKDeviceType = typename KKDevice<DeviceType>::value;
+
+  template<typename DataType, typename Layout>
+  using DupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterDuplicated>;
+
+  template<typename DataType, typename Layout>
+  using NonDupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterNonDuplicated>;
+
+  DupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> dup_f;
+  DupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> dup_vatom;
+
+  NonDupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> ndup_f;
+  NonDupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> ndup_vatom;
+
+  friend void pair_virial_fdotr_compute<PairPACEKokkos>(PairPACEKokkos*);
+
+  void grow(int, int);
+  void copy_pertype();
+  void copy_splines();
+  void copy_tilde();
+  void allocate() override;
+  void precompute_harmonics();
+  double memory_usage();
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
+      const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void compute_barplm(int, int, double, int) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void compute_ylm(int, int, double, double, double, int) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void cutoff_func_poly(const double, const double, const double, double &, double &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void Fexp(const double, const double, double &, double &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void FexpShiftedScaled(const double, const double, double &, double &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void inner_cutoff(const double, const double, const double, double &, double &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void FS_values_and_derivatives(const int, double&, const int) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void evaluate_splines(const int, const int, double, int, int, int, int) const;
+
+  template<class TagStyle>
+  void check_team_size_for(int, int&, int);
+
+  template<class TagStyle>
+  void check_team_size_reduce(int, int&, int);
+
+  // Utility routine which wraps computing per-team scratch size requirements for
+  // ComputeNeigh, ComputeUi, and ComputeFusedDeidrj
+  template <typename scratch_type>
+  int scratch_size_helper(int values_per_team);
+
+  typedef Kokkos::View<int*, DeviceType> t_ace_1i;
+  typedef Kokkos::View<int**, DeviceType> t_ace_2i;
+  typedef Kokkos::View<int***, DeviceType> t_ace_3i;
+  typedef Kokkos::View<int****, DeviceType> t_ace_4i;
+  typedef Kokkos::View<double*, DeviceType> t_ace_1d;
+  typedef Kokkos::View<double**, DeviceType> t_ace_2d;
+  typedef Kokkos::View<double*[3], DeviceType> t_ace_2d3;
+  typedef Kokkos::View<double***, DeviceType> t_ace_3d;
+  typedef Kokkos::View<double**[3], DeviceType> t_ace_3d3;
+  typedef Kokkos::View<double**[4], DeviceType> t_ace_3d4;
+  typedef Kokkos::View<double****, DeviceType> t_ace_4d;
+  typedef Kokkos::View<complex*, DeviceType> t_ace_1c;
+  typedef Kokkos::View<complex**, DeviceType> t_ace_2c;
+  typedef Kokkos::View<complex***, DeviceType> t_ace_3c;
+  typedef Kokkos::View<complex**[3], DeviceType> t_ace_3c3;
+  typedef Kokkos::View<complex****, DeviceType> t_ace_4c;
+  typedef Kokkos::View<complex***[3], DeviceType> t_ace_4c3;
+
+  t_ace_3d A_rank1;
+  t_ace_4c A;
+
+  t_ace_3c A_list;
+  t_ace_3c A_forward_prod;
+
+  t_ace_3d weights_rank1;
+  t_ace_4c weights;
+
+  t_ace_1d e_atom;
+  t_ace_2d rhos;
+  t_ace_2d dF_drho;
+
+  // hard-core repulsion
+  t_ace_1d rho_core;
+  t_ace_3c dB_flatten;
+  t_ace_2d cr;
+  t_ace_2d dcr;
+  t_ace_1d dF_drho_core;
+
+  // radial functions
+  t_ace_4d fr;
+  t_ace_4d dfr;
+  t_ace_3d gr;
+  t_ace_3d dgr;
+  t_ace_3d d_values;
+  t_ace_3d d_derivatives;
+
+  // Spherical Harmonics
+
+  void pre_compute_harmonics(int);
+
+  KOKKOS_INLINE_FUNCTION
+  void compute_barplm(double rz, int lmaxi);
+
+  KOKKOS_INLINE_FUNCTION
+  void compute_ylm(double rx, double ry, double rz, int lmaxi);
+
+  t_ace_1d alm;
+  t_ace_1d blm;
+  t_ace_1d cl;
+  t_ace_1d dl;
+
+  t_ace_3d plm;
+  t_ace_3d dplm;
+
+  t_ace_3c ylm;
+  t_ace_4c3 dylm;
+
+  // short neigh list
+  t_ace_1i d_ncount;
+  t_ace_2d d_mu;
+  t_ace_2d d_rnorms;
+  t_ace_3d3 d_rhats;
+  t_ace_2i d_nearest;
+
+  // per-type
+  t_ace_1i d_ndensity;
+  t_ace_1i d_npoti;
+  t_ace_1d d_rho_core_cutoff;
+  t_ace_1d d_drho_core_cutoff;
+  t_ace_1d d_E0vals;
+  t_ace_2d d_wpre;
+  t_ace_2d d_mexp;
+
+  // tilde
+  t_ace_1i d_idx_rho_count;
+  t_ace_2i d_rank;
+  t_ace_2i d_num_ms_combs;
+  t_ace_2i d_offsets;
+  t_ace_3i d_mus;
+  t_ace_3i d_ns;
+  t_ace_3i d_ls;
+  t_ace_3i d_ms_combs;
+  t_ace_3d d_ctildes;
+
+  t_ace_3d3 f_ij;
+
+ public:
+  struct SplineInterpolatorKokkos {
+    int ntot, nlut, num_of_functions;
+    double cutoff, deltaSplineBins, invrscalelookup, rscalelookup;
+
+    t_ace_3d4 lookupTable;
+
+    void operator=(const SplineInterpolator &spline) {
+      cutoff = spline.cutoff;
+      deltaSplineBins = spline.deltaSplineBins;
+      ntot = spline.ntot;
+      nlut = spline.nlut;
+      invrscalelookup = spline.invrscalelookup;
+      rscalelookup = spline.rscalelookup;
+      num_of_functions = spline.num_of_functions;
+
+      lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions);
+      auto h_lookupTable = Kokkos::create_mirror_view(lookupTable);
+      for (int i = 0; i < ntot+1; i++)
+        for (int j = 0; j < num_of_functions; j++)
+          for (int k = 0; k < 4; k++)
+            h_lookupTable(i, j, k) = spline.lookupTable(i, j, k);
+      Kokkos::deep_copy(lookupTable, h_lookupTable);
+    }
+
+    void deallocate() {
+      lookupTable = t_ace_3d4();
+    }
+
+    double memory_usage() {
+      return lookupTable.span() * sizeof(typename decltype(lookupTable)::value_type);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    void calcSplines(const int ii, const int jj, const double r, const t_ace_3d &d_values, const t_ace_3d &d_derivatives) const;
+  };
+
+  Kokkos::DualView<SplineInterpolatorKokkos**, DeviceType> k_splines_gk;
+  Kokkos::DualView<SplineInterpolatorKokkos**, DeviceType> k_splines_rnl;
+  Kokkos::DualView<SplineInterpolatorKokkos**, DeviceType> k_splines_hc;
+
+};
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
--- a/src/KOKKOS/pair_snap_kokkos_impl.h
+++ b/src/KOKKOS/pair_snap_kokkos_impl.h
@ -91,9 +91,8 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::init_style()
 {
  if (host_flag) {
    if (lmp->kokkos->nthreads > 1)
-      if (comm->me == 0)
-        utils::logmesg(lmp,"Pair style snap/kk currently only runs on a single "
-                           "CPU thread, even if more threads are requested\n");
+      error->all(FLERR,"Pair style snap/kk can currently only run on a single "
+                         "CPU thread");

    PairSNAP::init_style();
    return;
--- a/src/ML-PACE/pair_pace.cpp
+++ b/src/ML-PACE/pair_pace.cpp
@ -91,6 +91,8 @@ PairPACE::PairPACE(LAMMPS *lmp) : Pair(lmp)
  recursive = false;

  scale = nullptr;
+
+  chunksize = 4096;
 }

 /* ----------------------------------------------------------------------
@ -250,18 +252,25 @@ void PairPACE::allocate()

 void PairPACE::settings(int narg, char **arg)
 {
-  if (narg > 1) error->all(FLERR, "Illegal pair_style command.");
+  if (narg > 3) error->all(FLERR, "Illegal pair_style command.");

  // ACE potentials are parameterized in metal units
  if (strcmp("metal", update->unit_style) != 0)
    error->all(FLERR, "ACE potentials require 'metal' units");

  recursive = true;    // default evaluator style: RECURSIVE
-  if (narg > 0) {
-    if (strcmp(arg[0], "recursive") == 0)
+
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "recursive") == 0) {
      recursive = true;
-    else if (strcmp(arg[0], "product") == 0) {
+      iarg += 1;
+    } else if (strcmp(arg[iarg], "product") == 0) {
      recursive = false;
+      iarg += 1;
+    } else if (strcmp(arg[iarg], "chunksize") == 0) {
+      chunksize = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
+      iarg += 2;
    } else
      error->all(FLERR, "Illegal pair_style command");
  }
--- a/src/ML-PACE/pair_pace.h
+++ b/src/ML-PACE/pair_pace.h
@ -56,6 +56,8 @@ class PairPACE : public Pair {

  double **scale;
  bool recursive;    // "recursive" option for ACERecursiveEvaluator
+
+  int chunksize;
 };
 }    // namespace LAMMPS_NS