Removed debug comments/old printfs etc.

2025-01-21 17:09:04 +01:00
parent c3ca6a8e56
commit 536aa7cadf
11 changed files with 0 additions and 631 deletions
--- a/src/KOKKOS/compute_gaussian_grid_local_kokkos.cpp
+++ b/src/KOKKOS/compute_gaussian_grid_local_kokkos.cpp
@ -54,17 +54,12 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP

  host_flag = (execution_space == Host);

-  // TODO: Extract cutsq in double loop below, no need for cutsq_tmp
-
-  //cutsq_tmp = cutsq[1][1];
-
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = 1; j <= atom->ntypes; j++){
      k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutsq[i][j]; //cutsq_tmp;
      k_cutsq.template modify<LMPHostType>();
    }
  }
-  //printf(">>> 1\n");
  // Set up element lists
  int n = atom->ntypes;
  MemKK::realloc_kokkos(d_radelem,"ComputeSNAGridKokkos::radelem",n);
@ -72,13 +67,11 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP
  MemKK::realloc_kokkos(d_prefacelem,"ComputeSNAGridKokkos::prefacelem",n+1);
  MemKK::realloc_kokkos(d_argfacelem,"ComputeSNAGridKokkos::argfacelem",n+1);
  MemKK::realloc_kokkos(d_map,"ComputeSNAGridKokkos::map",n+1);
-  //printf(">>> 2\n");
  auto h_radelem = Kokkos::create_mirror_view(d_radelem);
  auto h_sigmaelem = Kokkos::create_mirror_view(d_sigmaelem);
  auto h_prefacelem = Kokkos::create_mirror_view(d_prefacelem);
  auto h_argfacelem = Kokkos::create_mirror_view(d_argfacelem);
  auto h_map = Kokkos::create_mirror_view(d_map);
-  //printf(">>> 3\n");
  // start from index 1 because of how compute sna/grid is
  for (int i = 1; i <= atom->ntypes; i++) {
    h_radelem(i-1) = radelem[i];
@ -86,21 +79,11 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP
    h_prefacelem(i-1) = prefacelem[i];
    h_argfacelem(i-1) = argfacelem[i];
  }
-  //printf(">>> 4\n");
-  // In pair snap some things like `map` get allocated regardless of chem flag.
-  // In this compute, however, map does not get allocated in parent classes.
-  /*
-  for (int i = 1; i <= atom->ntypes; i++) {
-    h_map(i) = map[i];
-  }
-  */
-  //printf(">>> 5\n");
  Kokkos::deep_copy(d_radelem,h_radelem);
  Kokkos::deep_copy(d_sigmaelem,h_sigmaelem);
  Kokkos::deep_copy(d_prefacelem, h_prefacelem);
  Kokkos::deep_copy(d_argfacelem, h_argfacelem);
  Kokkos::deep_copy(d_map,h_map);
-  //printf(">>> 6\n");

 }

@ -109,14 +92,12 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP
 template<class DeviceType>
 ComputeGaussianGridLocalKokkos<DeviceType>::~ComputeGaussianGridLocalKokkos()
 {
-  //printf(">>> ComputeGaussianGridLocalKokkos destruct begin, copymode %d\n", copymode);
  if (copymode) return;

  memoryKK->destroy_kokkos(k_cutsq,cutsq);
  memoryKK->destroy_kokkos(k_alocal,alocal);
  //gridlocal_allocated = 0;

-  //printf(">>> ComputeGaussianGridLocalKokkos end\n");
 }

 /* ---------------------------------------------------------------------- */
@ -125,25 +106,12 @@ template<class DeviceType>
 void ComputeGaussianGridLocalKokkos<DeviceType>::setup()
 {

-  // Do not call ComputeGrid::setup(), we don't wanna allocate the grid array there.
-  // Instead, call ComputeGrid::set_grid_global and set_grid_local to set the n indices.
-
-  //ComputeGrid::set_grid_global();
-  //ComputeGrid::set_grid_local();
  ComputeGridLocal::setup();

  // allocate arrays
-  //printf(">>> rows cols kokkos init: %d %d\n", size_local_rows, size_local_cols);
  memoryKK->create_kokkos(k_alocal, alocal, size_local_rows, size_local_cols, "grid:alocal");
-
-  //gridlocal_allocated = 1;
-  //array = gridall;
-
  array_local = alocal;
-
  d_alocal = k_alocal.template view<DeviceType>();
-  //d_grid = k_grid.template view<DeviceType>();
-  //d_gridall = k_gridall.template view<DeviceType>();

 }

@ -160,8 +128,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::init()
 template<class DeviceType>
 void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
 {
-  //printf(">>> compute_local Kokkos begin\n");
-
  if (host_flag) {
    return;
  }
@ -202,11 +168,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
    team_size_default = 1; // cost will increase with increasing team size //32;//max_neighs;

  if (triclinic){
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    h0 = domain->h[0];
    h1 = domain->h[1];
    h2 = domain->h[2];
@ -228,9 +189,7 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
      int vector_length = vector_length_default;
      int team_size = team_size_default;
      check_team_size_for<TagComputeGaussianGridLocalNeigh>(chunk_size,team_size,vector_length);
-      //printf(">>> Check 1 %d %d %d\n", chunk_size, team_size, vector_length);
      typename Kokkos::TeamPolicy<DeviceType, TagComputeGaussianGridLocalNeigh> policy_neigh(chunk_size,team_size,vector_length);
-      //printf(">>> Check 2\n");
      Kokkos::parallel_for("ComputeGaussianGridLocalNeigh",policy_neigh,*this);
    }

@ -243,8 +202,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
  k_alocal.template modify<DeviceType>();
  k_alocal.template sync<LMPHostType>();

-  //printf(">>> k_alocal: %f\n", k_alocal.h_view(0,6));
-
 }

 /* ---------------------------------------------------------------------- */
@ -254,7 +211,6 @@ KOKKOS_INLINE_FUNCTION
 void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianGridLocalNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagComputeGaussianGridLocalNeigh>::member_type& team) const
 {
  const int ii = team.league_rank();
-  //printf("%d\n", ii);

  if (ii >= chunk_size) return;

@ -284,7 +240,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianG

  // index ii already captures the proper grid point
  //int igrid = iz * (nx * ny) + iy * nx + ix;
-  //printf("%d %d\n", ii, igrid);

  // grid2x converts igrid to ix,iy,iz like we've done before
  // multiply grid integers by grid spacing delx, dely, delz
@ -302,11 +257,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianG
    // Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed

    // Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
    xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
    xgrid[2] = h2*xgrid[2] + lo2;
@ -348,13 +298,10 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianG
    const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;

    if (rsq < rnd_cutsq(jtype, jtype) ) {
-      //printf("%f %f\n", d_prefacelem(jtype-1), d_argfacelem(jtype-1));
      int icol = size_local_cols_base + jtype - 1;
      d_alocal(igrid, icol) += d_prefacelem(jtype-1) * exp(-rsq * d_argfacelem(jtype-1));
    }
  }
-
-  //printf("%f\n", d_alocal(igrid, 6));
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/compute_gaussian_grid_local_kokkos.h
+++ b/src/KOKKOS/compute_gaussian_grid_local_kokkos.h
@ -58,8 +58,6 @@ template <class DeviceType> class ComputeGaussianGridLocalKokkos : public Comput
  void operator() (TagComputeGaussianGridLocalNeigh, const typename Kokkos::TeamPolicy<DeviceType, TagComputeGaussianGridLocalNeigh>::member_type& team) const;

 private:
-  //double adof, mvv2e, mv2d, boltz;
-
  Kokkos::View<double*, DeviceType> d_radelem;              // element radii
  Kokkos::View<double*, DeviceType> d_sigmaelem;
  Kokkos::View<double*, DeviceType> d_prefacelem;
@ -73,21 +71,6 @@ template <class DeviceType> class ComputeGaussianGridLocalKokkos : public Comput
      Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_fparams_rnd;
  t_fparams_rnd rnd_cutsq;

-  /*
-  typename AT::t_x_array x;
-  typename AT::t_v_array v;
-  typename ArrayTypes<DeviceType>::t_float_1d rmass;
-  typename ArrayTypes<DeviceType>::t_float_1d mass;
-  typename ArrayTypes<DeviceType>::t_int_1d type;
-  typename ArrayTypes<DeviceType>::t_int_1d mask;
-  */
-
-  //typename AT::t_neighbors_2d d_neighbors;
-  //typename AT::t_int_1d d_ilist;
-  //typename AT::t_int_1d d_numneigh;
-
-  //DAT::tdual_float_2d k_result;
-  //typename AT::t_float_2d d_result;

  int max_neighs, inum, chunk_size, chunk_offset;
  int host_flag;
@ -103,11 +86,6 @@ template <class DeviceType> class ComputeGaussianGridLocalKokkos : public Comput
  typename AT::t_float_2d d_alocal;

  // triclinic vars
-  /*
-  xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-  xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-  xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-  */
  double h0, h1, h2, h3, h4, h5;
  double lo0, lo1, lo2;
 };
--- a/src/KOKKOS/compute_sna_grid_kokkos.cpp
+++ b/src/KOKKOS/compute_sna_grid_kokkos.cpp
@ -23,59 +23,3 @@ template class ComputeSNAGridKokkosHost<LMPHostType>;
 #endif

 }
-
-
-
-
-// The following chunk will compile but we're gonna try a wrapper approach like pair snap.
-/*
-#include "compute_sna_grid_kokkos.h"
-
-#include "atom_kokkos.h"
-#include "atom_masks.h"
-#include "comm.h"
-#include "error.h"
-#include "memory_kokkos.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
-#include "neighbor_kokkos.h"
-#include "sna_kokkos.h"
-#include "update.h"
-
-using namespace LAMMPS_NS;
-
-// ----------------------------------------------------------------------
-
-template<class DeviceType>
-ComputeSNAGridKokkos<DeviceType>::ComputeSNAGridKokkos(LAMMPS *lmp, int narg, char **arg) :
-  ComputeSNAGrid(lmp, narg, arg)
-{
-
-  printf("^^^ inside ComputeSNAGridKokkos constructor\n");
-  kokkosable = 1;
-  atomKK = (AtomKokkos *) atom;
-  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
-  datamask_read = EMPTY_MASK;
-  datamask_modify = EMPTY_MASK;
-
-}
-
-// ----------------------------------------------------------------------
-
-template<class DeviceType>
-ComputeSNAGridKokkos<DeviceType>::~ComputeSNAGridKokkos()
-{
-  if (copymode) return;
-
-
-}
-
-namespace LAMMPS_NS {
-template class ComputeSNAGridKokkos<LMPDeviceType>;
-#ifdef LMP_KOKKOS_GPU
-template class ComputeSNAGridKokkos<LMPHostType>;
-#endif
-}
-*/
-
--- a/src/KOKKOS/compute_sna_grid_kokkos.h
+++ b/src/KOKKOS/compute_sna_grid_kokkos.h
@ -29,38 +29,13 @@ ComputeStyle(sna/grid/kk/host,ComputeSNAGridKokkosDevice<LMPHostType>);

 #include "compute_sna_grid.h"
 #include "kokkos_type.h"
-//#include "pair_snap.h"
-//#include "kokkos_type.h"
-//#include "neigh_list_kokkos.h"
 #include "sna_kokkos.h"
-//#include "pair_kokkos.h"

 namespace LAMMPS_NS {

 // Routines for both the CPU and GPU backend
-//template<int NEIGHFLAG, int EVFLAG>
-//struct TagPairSNAPComputeForce{};
-

 // GPU backend only
-/*
-struct TagPairSNAPComputeNeigh{};
-struct TagPairSNAPComputeCayleyKlein{};
-struct TagPairSNAPPreUi{};
-struct TagPairSNAPComputeUiSmall{}; // more parallelism, more divergence
-struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
-struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
-struct TagPairSNAPComputeZi{};
-struct TagPairSNAPBeta{};
-struct TagPairSNAPComputeBi{};
-struct TagPairSNAPComputeYi{};
-struct TagPairSNAPComputeYiWithZlist{};
-template<int dir>
-struct TagPairSNAPComputeFusedDeidrjSmall{}; // more parallelism, more divergence
-template<int dir>
-struct TagPairSNAPComputeFusedDeidrjLarge{}; // less parallelism, no divergence
-*/
-//struct TagPairSNAPPreUi{};
 struct TagCSNAGridComputeNeigh{};
 struct TagCSNAGridComputeCayleyKlein{};
 struct TagCSNAGridPreUi{};
@ -70,26 +45,11 @@ struct TagCSNAGridTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero yl
 template <bool chemsnap> struct TagCSNAGridComputeZi{};
 template <bool chemsnap> struct TagCSNAGridComputeBi{};
 struct TagCSNAGridLocalFill{}; // fill the gridlocal array
-//struct TagCSNAGridLocalFill2{}; // fill the gridlocal array using same kinda loop as ComputeForce

 struct TagComputeSNAGridLoop{};
 struct TagComputeSNAGrid3D{};
-//struct TagCSNAGridTeam{};

 // CPU backend only
-/*
-struct TagPairSNAPComputeNeighCPU{};
-struct TagPairSNAPPreUiCPU{};
-struct TagPairSNAPComputeUiCPU{};
-struct TagPairSNAPTransformUiCPU{};
-struct TagPairSNAPComputeZiCPU{};
-struct TagPairSNAPBetaCPU{};
-struct TagPairSNAPComputeBiCPU{};
-struct TagPairSNAPZeroYiCPU{};
-struct TagPairSNAPComputeYiCPU{};
-struct TagPairSNAPComputeDuidrjCPU{};
-struct TagPairSNAPComputeDeidrjCPU{};
-*/
 struct TagComputeSNAGridLoopCPU{};

 //template<class DeviceType>
@ -180,7 +140,6 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid {

  // operator function for example team policy
  //KOKKOS_INLINE_FUNCTION
-  //void operator() (TagCSNAGridTeam, const typename Kokkos::TeamPolicy<DeviceType, TagCSNAGridTeam>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagComputeSNAGridLoop, const int& ) const;
@ -191,9 +150,6 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid {
  KOKKOS_INLINE_FUNCTION
  void operator() (TagCSNAGridComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagCSNAGridComputeNeigh>::member_type& team) const;

-  // PrintNeigh
-  //void operator() (TagPrintNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPrintNeigh>::member_type& team) const;
-
  // 3D case - used by parallel_for
  KOKKOS_INLINE_FUNCTION
  void operator()(TagComputeSNAGrid3D, const int& iz, const int& iy, const int& ix) const;
@ -294,11 +250,6 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid {
  class DomainKokkos *domainKK;

  // triclinic vars
-  /*
-  xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-  xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-  xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-  */
  double h0, h1, h2, h3, h4, h5;
  double lo0, lo1, lo2;

@ -344,45 +295,3 @@ class ComputeSNAGridKokkosHost : public ComputeSNAGridKokkos<DeviceType, SNAP_KO

 #endif
 #endif
-
-// The following will compile with the chunk in cpp file but we're gonna try wrapper like pair snap.
-/*
-#ifdef COMPUTE_CLASS
-// clang-format off
-ComputeStyle(sna/grid/kk,ComputeSNAGridKokkos<LMPDeviceType>);
-ComputeStyle(sna/grid/kk/device,ComputeSNAGridKokkos<LMPDeviceType>);
-ComputeStyle(sna/grid/kk/host,ComputeSNAGridKokkos<LMPHostType>);
-// clang-format on
-#else
-
-// clang-format off
-#ifndef LMP_COMPUTE_SNA_GRID_KOKKOS_H
-#define LMP_COMPUTE_SNA_GRID_KOKKOS_H
-
-#include "compute_sna_grid.h"
-#include "kokkos_type.h"
-
-namespace LAMMPS_NS {
-
-//template<int CSTYLE, int NCOL>
-//struct TagComputeCoordAtom{};
-
-template<class DeviceType>
-class ComputeSNAGridKokkos : public ComputeSNAGrid {
- public:
-  typedef DeviceType device_type;
-  typedef ArrayTypes<DeviceType> AT;
-
-  ComputeSNAGridKokkos(class LAMMPS *, int, char **);
-  ~ComputeSNAGridKokkos() override;
-
- private:
-
-};
-
-}
-
-#endif
-#endif
-*/
-
--- a/src/KOKKOS/compute_sna_grid_kokkos_impl.h
+++ b/src/KOKKOS/compute_sna_grid_kokkos_impl.h
@ -27,7 +27,6 @@
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor_kokkos.h"
-//#include "sna_kokkos.h"
 #include "domain.h"
 #include "domain_kokkos.h"
 #include "sna.h"
@ -131,14 +130,10 @@ ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::ComputeSNAGridKokkos
 template<class DeviceType, typename real_type, int vector_length>
 ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::~ComputeSNAGridKokkos()
 {
-  //printf(">>> ComputeSNAGridKokkos destruct begin copymode %d\n", copymode);
  if (copymode) return;
-  //printf(">>> After copymode\n");

  memoryKK->destroy_kokkos(k_cutsq,cutsq);
-  //memoryKK->destroy_kokkos(k_grid,grid);
  memoryKK->destroy_kokkos(k_gridall, gridall);
-  //memoryKK->destroy_kokkos(k_gridlocal, gridlocal);
 }

 // Setup
@ -161,7 +156,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::setup()
  array = gridall;

  d_gridlocal = k_gridlocal.template view<DeviceType>();
-  //d_grid = k_grid.template view<DeviceType>();
  d_gridall = k_gridall.template view<DeviceType>();
 }

@ -199,23 +193,14 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::compute_array()

  // "chunksize" variable is default 32768 in compute_sna_grid.cpp, and set by user
  // `total_range` is the number of grid points which may be larger than chunk size.
-  //printf(">>> total_range: %d\n", total_range);
  chunk_size = MIN(chunksize, total_range);
  chunk_offset = 0;
-  //snaKK.grow_rij(chunk_size, ntotal);
  snaKK.grow_rij(chunk_size, max_neighs);

-  //chunk_size = total_range;
-
  // Pre-compute ceil(chunk_size / vector_length) for code cleanliness
  const int chunk_size_div = (chunk_size + vector_length - 1) / vector_length;

  if (triclinic) {
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    h0 = domain->h[0];
    h1 = domain->h[1];
    h2 = domain->h[2];
@ -232,7 +217,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::compute_array()
    if (chunk_size > total_range - chunk_offset)
      chunk_size = total_range - chunk_offset;

-    //printf(">>> chunk_offset: %d\n", chunk_offset);

    //ComputeNeigh
    {
@ -333,9 +317,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::compute_array()
  k_gridlocal.template modify<DeviceType>();
  k_gridlocal.template sync<LMPHostType>();

-  //k_grid.template modify<DeviceType>();
-  //k_grid.template sync<LMPHostType>();
-
  k_gridall.template modify<DeviceType>();
  k_gridall.template sync<LMPHostType>();
 }
@ -396,7 +377,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag

  // index ii already captures the proper grid point
  //int igrid = iz * (nx * ny) + iy * nx + ix;
-  //printf("%d %d\n", ii, igrid);

  // grid2x converts igrid to ix,iy,iz like we've done before
  // multiply grid integers by grid spacing delx, dely, delz
@ -414,11 +394,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
    // Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed

    // Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
    xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
    xgrid[2] = h2*xgrid[2] + lo2;
@ -436,14 +411,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
  if (chemflag) ielem = d_map[itype];
  //const double radi = d_radelem[ielem];

-  // We need a DomainKokkos::lamda2x parallel for loop here, but let's ignore for now.
-  // The purpose here is to transform for triclinic boxes.
-  /*
-  if (triclinic){
-    printf("We are triclinic %f %f %f\n", xtmp, ytmp, ztmp);
-  }
-  */
-
  // Compute the number of neighbors, store rsq
  int ninside = 0;

@ -464,29 +431,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
      ninside++;
  }

-  /*
-  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,ntotal),
-    [&] (const int j, int& count) {
-    const F_FLOAT dx = x(j,0) - xtmp;
-    const F_FLOAT dy = x(j,1) - ytmp;
-    const F_FLOAT dz = x(j,2) - ztmp;
-
-    int jtype = type(j);
-    const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
-
-    // don't include atoms that share location with grid point
-    if (rsq >= rnd_cutsq(itype,jtype) || rsq < 1e-20) {
-      jtype = -1; // use -1 to signal it's outside the radius
-    }
-
-    type_cache[j] = jtype;
-
-    if (jtype >= 0)
-     count++;
-
-  }, ninside);
-  */
-
  d_ninside(ii) = ninside;

  // TODO: Adjust for multi-element, currently we set jelem = 0 regardless of type.
@ -521,75 +465,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
      offset++;
    }
  }
-
-  /*
-  int offset = 0;
-  for (int j = 0; j < ntotal; j++){
-    const int jtype = type_cache[j];
-    if (jtype >= 0) {
-      printf(">>> offset: %d\n", offset);
-      const F_FLOAT dx = x(j,0) - xtmp;
-      const F_FLOAT dy = x(j,1) - ytmp;
-      const F_FLOAT dz = x(j,2) - ztmp;
-      int jtype = type(j);
-      int jelem = 0;
-      if (chemflag) jelem = d_map[jtype];
-      snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
-      snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
-      snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
-      // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
-      // actually since the views here have values starting at 0, let's use jelem
-      snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
-      snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
-      snaKK.inside(ii,offset) = j;
-      if (switchinnerflag) {
-        snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
-        snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
-      }
-      if (chemflag)
-        snaKK.element(ii,offset) = jelem;
-      else
-        snaKK.element(ii,offset) = 0;
-      offset++;
-    }
-  }
-  */
-
-  /*
-  Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,ntotal),
-    [&] (const int j, int& offset, bool final) {
-
-    const int jtype = type_cache[j];
-
-    if (jtype >= 0) {
-      if (final) {
-        const F_FLOAT dx = x(j,0) - xtmp;
-        const F_FLOAT dy = x(j,1) - ytmp;
-        const F_FLOAT dz = x(j,2) - ztmp;
-        int jtype = type(j);
-        int jelem = 0;
-        if (chemflag) jelem = d_map[jtype];
-        snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
-        snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
-        snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
-        // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
-        // actually since the views here have values starting at 0, let's use jelem
-        snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
-        snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
-        snaKK.inside(ii,offset) = j;
-        if (switchinnerflag) {
-          snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
-          snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
-        }
-        if (chemflag)
-          snaKK.element(ii,offset) = jelem;
-        else
-          snaKK.element(ii,offset) = 0;
-      }
-      offset++;
-    }
-  });
-  */
 }

 /* ----------------------------------------------------------------------
@ -821,11 +696,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
    // Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed

    // Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
    xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
    xgrid[2] = h2*xgrid[2] + lo2;
--- a/src/KOKKOS/compute_sna_grid_local_kokkos.cpp
+++ b/src/KOKKOS/compute_sna_grid_local_kokkos.cpp
@ -23,59 +23,3 @@ template class ComputeSNAGridLocalKokkosHost<LMPHostType>;
 #endif

 }
-
-
-
-
-// The following chunk will compile but we're gonna try a wrapper approach like pair snap.
-/*
-#include "compute_sna_grid_kokkos.h"
-
-#include "atom_kokkos.h"
-#include "atom_masks.h"
-#include "comm.h"
-#include "error.h"
-#include "memory_kokkos.h"
-#include "modify.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
-#include "neighbor_kokkos.h"
-#include "sna_kokkos.h"
-#include "update.h"
-
-using namespace LAMMPS_NS;
-
-// ----------------------------------------------------------------------
-
-template<class DeviceType>
-ComputeSNAGridKokkos<DeviceType>::ComputeSNAGridKokkos(LAMMPS *lmp, int narg, char **arg) :
-  ComputeSNAGrid(lmp, narg, arg)
-{
-
-  printf("^^^ inside ComputeSNAGridKokkos constructor\n");
-  kokkosable = 1;
-  atomKK = (AtomKokkos *) atom;
-  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
-  datamask_read = EMPTY_MASK;
-  datamask_modify = EMPTY_MASK;
-
-}
-
-// ----------------------------------------------------------------------
-
-template<class DeviceType>
-ComputeSNAGridKokkos<DeviceType>::~ComputeSNAGridKokkos()
-{
-  if (copymode) return;
-
-
-}
-
-namespace LAMMPS_NS {
-template class ComputeSNAGridKokkos<LMPDeviceType>;
-#ifdef LMP_KOKKOS_GPU
-template class ComputeSNAGridKokkos<LMPHostType>;
-#endif
-}
-*/
-
--- a/src/KOKKOS/compute_sna_grid_local_kokkos.h
+++ b/src/KOKKOS/compute_sna_grid_local_kokkos.h
@ -29,38 +29,13 @@ ComputeStyle(sna/grid/local/kk/host,ComputeSNAGridLocalKokkosDevice<LMPHostType>

 #include "compute_sna_grid_local.h"
 #include "kokkos_type.h"
-//#include "pair_snap.h"
-//#include "kokkos_type.h"
-//#include "neigh_list_kokkos.h"
 #include "sna_kokkos.h"
-//#include "pair_kokkos.h"

 namespace LAMMPS_NS {

 // Routines for both the CPU and GPU backend
-//template<int NEIGHFLAG, int EVFLAG>
-//struct TagPairSNAPComputeForce{};
-

 // GPU backend only
-/*
-struct TagPairSNAPComputeNeigh{};
-struct TagPairSNAPComputeCayleyKlein{};
-struct TagPairSNAPPreUi{};
-struct TagPairSNAPComputeUiSmall{}; // more parallelism, more divergence
-struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
-struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
-struct TagPairSNAPComputeZi{};
-struct TagPairSNAPBeta{};
-struct TagPairSNAPComputeBi{};
-struct TagPairSNAPComputeYi{};
-struct TagPairSNAPComputeYiWithZlist{};
-template<int dir>
-struct TagPairSNAPComputeFusedDeidrjSmall{}; // more parallelism, more divergence
-template<int dir>
-struct TagPairSNAPComputeFusedDeidrjLarge{}; // less parallelism, no divergence
-*/
-//struct TagPairSNAPPreUi{};
 struct TagCSNAGridLocalComputeNeigh{};
 struct TagCSNAGridLocalComputeCayleyKlein{};
 struct TagCSNAGridLocalPreUi{};
@ -70,25 +45,11 @@ struct TagCSNAGridLocalTransformUi{}; // re-order ulisttot from SoA to AoSoA, ze
 template <bool chemsnap> struct TagCSNAGridLocalComputeZi{};
 template <bool chemsnap> struct TagCSNAGridLocalComputeBi{};
 struct TagCSNAGridLocal2Fill{}; // fill the gridlocal array
-//struct TagCSNAGridLocalFill2{}; // fill the gridlocal array using same kinda loop as ComputeForce

 struct TagComputeSNAGridLocalLoop{};
 struct TagComputeSNAGridLocal3D{};

 // CPU backend only
-/*
-struct TagPairSNAPComputeNeighCPU{};
-struct TagPairSNAPPreUiCPU{};
-struct TagPairSNAPComputeUiCPU{};
-struct TagPairSNAPTransformUiCPU{};
-struct TagPairSNAPComputeZiCPU{};
-struct TagPairSNAPBetaCPU{};
-struct TagPairSNAPComputeBiCPU{};
-struct TagPairSNAPZeroYiCPU{};
-struct TagPairSNAPComputeYiCPU{};
-struct TagPairSNAPComputeDuidrjCPU{};
-struct TagPairSNAPComputeDeidrjCPU{};
-*/
 struct TagComputeSNAGridLocalLoopCPU{};

 //template<class DeviceType>
@ -184,9 +145,6 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal {
  KOKKOS_INLINE_FUNCTION
  void operator() (TagCSNAGridLocalComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagCSNAGridLocalComputeNeigh>::member_type& team) const;

-  // PrintNeigh
-  //void operator() (TagPrintNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPrintNeigh>::member_type& team) const;
-
  // 3D case - used by parallel_for
  KOKKOS_INLINE_FUNCTION
  void operator()(TagComputeSNAGridLocal3D, const int& iz, const int& iy, const int& ix) const;
@ -274,16 +232,6 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal {
  DAT::tdual_float_2d k_alocal;
  typename AT::t_float_2d d_alocal;

-  /*
-  DAT::tdual_float_2d k_grid;
-  DAT::tdual_float_2d k_gridall;
-  typename AT::t_float_2d d_grid;
-  typename AT::t_float_2d d_gridall;
-
-  DAT::tdual_float_4d k_gridlocal;
-  typename AT::t_float_4d d_gridlocal;
-  */
-

  // Utility routine which wraps computing per-team scratch size requirements for
  // ComputeNeigh, ComputeUi, and ComputeFusedDeidrj
@ -293,11 +241,6 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal {
  class DomainKokkos *domainKK;

  // triclinic vars
-  /*
-  xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-  xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-  xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-  */
  double h0, h1, h2, h3, h4, h5;
  double lo0, lo1, lo2;

@ -320,7 +263,6 @@ class ComputeSNAGridLocalKokkosDevice : public ComputeSNAGridLocalKokkos<DeviceT
  ComputeSNAGridLocalKokkosDevice(class LAMMPS *, int, char **);

  void compute_local() override;
-  //void setup() override;

 };

--- a/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h
+++ b/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h
@ -27,7 +27,6 @@
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor_kokkos.h"
-//#include "sna_kokkos.h"
 #include "domain.h"
 #include "domain_kokkos.h"
 #include "sna.h"
@ -131,15 +130,10 @@ ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::ComputeSNAGridL
 template<class DeviceType, typename real_type, int vector_length>
 ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::~ComputeSNAGridLocalKokkos()
 {
-  //printf(">>> ComputeSNAGridLocalKokkos destruct begin copymode %d\n", copymode);
  if (copymode) return;
-  //printf(">>> After copymode\n");

  memoryKK->destroy_kokkos(k_cutsq,cutsq);
  memoryKK->destroy_kokkos(k_alocal,alocal);
-  //memoryKK->destroy_kokkos(k_grid,grid);
-  //memoryKK->destroy_kokkos(k_gridall, gridall);
-  //memoryKK->destroy_kokkos(k_gridlocal, gridlocal);
 }

 // Setup
@ -148,28 +142,11 @@ template<class DeviceType, typename real_type, int vector_length>
 void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::setup()
 {

-  // Do not call ComputeGrid::setup(), we don't wanna allocate the grid array there.
-  // Instead, call ComputeGrid::set_grid_global and set_grid_local to set the n indices.
-
-  //ComputeGrid::set_grid_global();
-  //ComputeGrid::set_grid_local();
-  //ComputeSNAGridLocal::setup();
  ComputeGridLocal::setup();

  // allocate arrays
-  //memoryKK->create_kokkos(k_gridall, gridall, size_array_rows, size_array_cols, "grid:gridall");
  memoryKK->create_kokkos(k_alocal, alocal, size_local_rows, size_local_cols, "grid:alocal");
-
-  // do not use or allocate gridlocal for now
-
-  //gridlocal_allocated = 0;
-  //array = gridall;
-
  array_local = alocal;
-
-  //d_gridlocal = k_gridlocal.template view<DeviceType>();
-  //d_grid = k_grid.template view<DeviceType>();
-  //d_gridall = k_gridall.template view<DeviceType>();
  d_alocal = k_alocal.template view<DeviceType>();
 }

@ -183,8 +160,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo
    return;
  }

-  //printf(">>> ComputeSNAGridLocalKokkos::compute_local begin\n");
-
  copymode = 1;

  zlen = nzhi-nzlo+1;
@ -205,12 +180,10 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo

  ntotal = atomKK->nlocal + atomKK->nghost;
  // Allocate view for number of neighbors per grid point
-  //printf(">>> total_range: %d\n", total_range);
  MemKK::realloc_kokkos(d_ninside,"ComputeSNAGridLocalKokkos:ninside",total_range);

  // "chunksize" variable is default 32768 in compute_sna_grid.cpp, and set by user
  // `total_range` is the number of grid points which may be larger than chunk size.
-  //printf(">>> total_range: %d\n", total_range);
  chunk_size = MIN(chunksize, total_range);
  chunk_offset = 0;
  //snaKK.grow_rij(chunk_size, ntotal);
@ -222,11 +195,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo
  const int chunk_size_div = (chunk_size + vector_length - 1) / vector_length;

  if (triclinic) {
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    h0 = domain->h[0];
    h1 = domain->h[1];
    h2 = domain->h[2];
@ -243,7 +211,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo
    if (chunk_size > total_range - chunk_offset)
      chunk_size = total_range - chunk_offset;

-    //printf(">>> chunk_offset: %d\n", chunk_offset);

    //ComputeNeigh
    {
@ -401,7 +368,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()

  // index ii already captures the proper grid point
  //int igrid = iz * (nx * ny) + iy * nx + ix;
-  //printf("%d %d\n", ii, igrid);

  // grid2x converts igrid to ix,iy,iz like we've done before
  // multiply grid integers by grid spacing delx, dely, delz
@ -419,11 +385,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
    // Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed

    // Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
    xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
    xgrid[2] = h2*xgrid[2] + lo2;
@ -454,14 +415,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
  if (chemflag) ielem = d_map[itype];
  //const double radi = d_radelem[ielem];

-  // We need a DomainKokkos::lamda2x parallel for loop here, but let's ignore for now.
-  // The purpose here is to transform for triclinic boxes.
-  /*
-  if (triclinic){
-    printf("We are triclinic %f %f %f\n", xtmp, ytmp, ztmp);
-  }
-  */
-
  // Compute the number of neighbors, store rsq
  int ninside = 0;

@ -482,29 +435,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
      ninside++;
  }

-  /*
-  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,ntotal),
-    [&] (const int j, int& count) {
-    const F_FLOAT dx = x(j,0) - xtmp;
-    const F_FLOAT dy = x(j,1) - ytmp;
-    const F_FLOAT dz = x(j,2) - ztmp;
-
-    int jtype = type(j);
-    const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
-
-    // don't include atoms that share location with grid point
-    if (rsq >= rnd_cutsq(itype,jtype) || rsq < 1e-20) {
-      jtype = -1; // use -1 to signal it's outside the radius
-    }
-
-    type_cache[j] = jtype;
-
-    if (jtype >= 0)
-     count++;
-
-  }, ninside);
-  */
-
  d_ninside(ii) = ninside;

  // TODO: Adjust for multi-element, currently we set jelem = 0 regardless of type.
@ -539,75 +469,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
      offset++;
    }
  }
-
-  /*
-  int offset = 0;
-  for (int j = 0; j < ntotal; j++){
-    const int jtype = type_cache[j];
-    if (jtype >= 0) {
-      printf(">>> offset: %d\n", offset);
-      const F_FLOAT dx = x(j,0) - xtmp;
-      const F_FLOAT dy = x(j,1) - ytmp;
-      const F_FLOAT dz = x(j,2) - ztmp;
-      int jtype = type(j);
-      int jelem = 0;
-      if (chemflag) jelem = d_map[jtype];
-      snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
-      snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
-      snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
-      // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
-      // actually since the views here have values starting at 0, let's use jelem
-      snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
-      snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
-      snaKK.inside(ii,offset) = j;
-      if (switchinnerflag) {
-        snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
-        snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
-      }
-      if (chemflag)
-        snaKK.element(ii,offset) = jelem;
-      else
-        snaKK.element(ii,offset) = 0;
-      offset++;
-    }
-  }
-  */
-
-  /*
-  Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,ntotal),
-    [&] (const int j, int& offset, bool final) {
-
-    const int jtype = type_cache[j];
-
-    if (jtype >= 0) {
-      if (final) {
-        const F_FLOAT dx = x(j,0) - xtmp;
-        const F_FLOAT dy = x(j,1) - ytmp;
-        const F_FLOAT dz = x(j,2) - ztmp;
-        int jtype = type(j);
-        int jelem = 0;
-        if (chemflag) jelem = d_map[jtype];
-        snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
-        snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
-        snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
-        // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
-        // actually since the views here have values starting at 0, let's use jelem
-        snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
-        snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
-        snaKK.inside(ii,offset) = j;
-        if (switchinnerflag) {
-          snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
-          snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
-        }
-        if (chemflag)
-          snaKK.element(ii,offset) = jelem;
-        else
-          snaKK.element(ii,offset) = 0;
-      }
-      offset++;
-    }
-  });
-  */
 }

 /* ----------------------------------------------------------------------
@ -839,22 +700,11 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
    // Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed

    // Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
-    /*
-    xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
-    xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
-    xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
-    */
    xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
    xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
    xgrid[2] = h2*xgrid[2] + lo2;
  }

-  //const F_FLOAT xtmp = xgrid[0];
-  //const F_FLOAT ytmp = xgrid[1];
-  //const F_FLOAT ztmp = xgrid[2];
-  //d_gridall(igrid,0) = xtmp;
-  //d_gridall(igrid,1) = ytmp;
-  //d_gridall(igrid,2) = ztmp;

  const auto idxb_max = snaKK.idxb_max;

--- a/src/ML-SNAP/compute_gaussian_grid_local.cpp
+++ b/src/ML-SNAP/compute_gaussian_grid_local.cpp
@ -89,14 +89,12 @@ ComputeGaussianGridLocal::ComputeGaussianGridLocal(LAMMPS *lmp, int narg, char *

 ComputeGaussianGridLocal::~ComputeGaussianGridLocal()
 {
-  //printf(">>> ComputeGaussianGridLocal begin destruct copymode %d\n", copymode);
  if (copymode) return;
  memory->destroy(radelem);
  memory->destroy(sigmaelem);
  memory->destroy(prefacelem);
  memory->destroy(argfacelem);
  memory->destroy(cutsq);
-  //printf(">>> ComputeGaussianGridLocal end destruct\n");
 }

 /* ---------------------------------------------------------------------- */
@ -111,8 +109,6 @@ void ComputeGaussianGridLocal::init()

 void ComputeGaussianGridLocal::compute_local()
 {
-  //printf(">>> compute_local CPU\n");
-  //printf(">>> size_local_cols_base, size_local_cols: %d %d\n", size_local_cols_base, size_local_cols);
  invoked_local = update->ntimestep;

  // compute gaussian for each gridpoint
--- a/src/ML-SNAP/compute_grid.cpp
+++ b/src/ML-SNAP/compute_grid.cpp
@ -88,7 +88,6 @@ void ComputeGrid::grid2x(int igrid, double *x)
  x[2] = iz * delz;

  if (triclinic) domain->lamda2x(x, x);
-  //printf(">>>>> ComputeGrid::grid2x\n");
 }

 /* ----------------------------------------------------------------------
@ -104,7 +103,6 @@ void ComputeGrid::assign_coords_all()
    gridall[igrid][1] = x[1];
    gridall[igrid][2] = x[2];
  }
-  //printf(">>>>> ComputeGrid::assign_coords_all\n");
 }

 /* ----------------------------------------------------------------------
@ -113,7 +111,6 @@ void ComputeGrid::assign_coords_all()

 void ComputeGrid::allocate()
 {
-  //printf(">>> ComputeGrid::allocate\n");
  // allocate arrays
  memory->create(grid, size_array_rows, size_array_cols, "grid:grid");
  memory->create(gridall, size_array_rows, size_array_cols, "grid:gridall");
--- a/src/ML-SNAP/compute_grid_local.cpp
+++ b/src/ML-SNAP/compute_grid_local.cpp
@ -61,9 +61,7 @@ ComputeGridLocal::ComputeGridLocal(LAMMPS *lmp, int narg, char **arg) :

 ComputeGridLocal::~ComputeGridLocal()
 {
-  //printf(">>> ComputeGridLocal begin destruct\n");
  deallocate();
-  //printf(">>> ComputeGridLocal end destruct\n");
 }

 /* ---------------------------------------------------------------------- */
@ -75,7 +73,6 @@ void ComputeGridLocal::setup()
  set_grid_local();
  allocate();
  assign_coords();
-  //printf(">>> ComputeGridLocal setup nx ny nz %d %d %d %d %d %d\n", nxlo, nxhi, nylo, nyhi, nzlo, nzhi);
 }

 /* ----------------------------------------------------------------------
@ -109,7 +106,6 @@ void ComputeGridLocal::grid2lamda(int ix, int iy, int iz, double *x)

 void ComputeGridLocal::allocate()
 {
-  //printf(">>> ComputeGridLocal::allocate %d %d\n", size_local_rows, size_local_cols);
  if (nxlo <= nxhi && nylo <= nyhi && nzlo <= nzhi) {
    gridlocal_allocated = 1;
    memory->create(alocal, size_local_rows, size_local_cols, "compute/grid/local:alocal");
@ -123,14 +119,12 @@ void ComputeGridLocal::allocate()

 void ComputeGridLocal::deallocate()
 {
-  //printf(">>> ComputeGridLocal::deallocate begin gridlocal_allocated %d copymode %d\n", gridlocal_allocated, copymode);
  if (copymode) return;

  if (gridlocal_allocated) {
    gridlocal_allocated = 0;
    memory->destroy(alocal);
  }
-  //printf(">>> ComputeGridLocal:: deallocate end\n");
  array_local = nullptr;
 }

@ -186,8 +180,6 @@ void ComputeGridLocal::set_grid_local()
  //   the 2 equality if tests ensure a consistent decision
  //   as to which proc owns it

-  //printf(">>> ComputeGridLocal set_grid_local\n");
-
  double xfraclo, xfrachi, yfraclo, yfrachi, zfraclo, zfrachi;

  if (comm->layout != Comm::LAYOUT_TILED) {