Removed debug comments/old printfs etc.
This commit is contained in:
@ -54,17 +54,12 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP
|
||||
|
||||
host_flag = (execution_space == Host);
|
||||
|
||||
// TODO: Extract cutsq in double loop below, no need for cutsq_tmp
|
||||
|
||||
//cutsq_tmp = cutsq[1][1];
|
||||
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = 1; j <= atom->ntypes; j++){
|
||||
k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutsq[i][j]; //cutsq_tmp;
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
}
|
||||
}
|
||||
//printf(">>> 1\n");
|
||||
// Set up element lists
|
||||
int n = atom->ntypes;
|
||||
MemKK::realloc_kokkos(d_radelem,"ComputeSNAGridKokkos::radelem",n);
|
||||
@ -72,13 +67,11 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP
|
||||
MemKK::realloc_kokkos(d_prefacelem,"ComputeSNAGridKokkos::prefacelem",n+1);
|
||||
MemKK::realloc_kokkos(d_argfacelem,"ComputeSNAGridKokkos::argfacelem",n+1);
|
||||
MemKK::realloc_kokkos(d_map,"ComputeSNAGridKokkos::map",n+1);
|
||||
//printf(">>> 2\n");
|
||||
auto h_radelem = Kokkos::create_mirror_view(d_radelem);
|
||||
auto h_sigmaelem = Kokkos::create_mirror_view(d_sigmaelem);
|
||||
auto h_prefacelem = Kokkos::create_mirror_view(d_prefacelem);
|
||||
auto h_argfacelem = Kokkos::create_mirror_view(d_argfacelem);
|
||||
auto h_map = Kokkos::create_mirror_view(d_map);
|
||||
//printf(">>> 3\n");
|
||||
// start from index 1 because of how compute sna/grid is
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
h_radelem(i-1) = radelem[i];
|
||||
@ -86,21 +79,11 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP
|
||||
h_prefacelem(i-1) = prefacelem[i];
|
||||
h_argfacelem(i-1) = argfacelem[i];
|
||||
}
|
||||
//printf(">>> 4\n");
|
||||
// In pair snap some things like `map` get allocated regardless of chem flag.
|
||||
// In this compute, however, map does not get allocated in parent classes.
|
||||
/*
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
h_map(i) = map[i];
|
||||
}
|
||||
*/
|
||||
//printf(">>> 5\n");
|
||||
Kokkos::deep_copy(d_radelem,h_radelem);
|
||||
Kokkos::deep_copy(d_sigmaelem,h_sigmaelem);
|
||||
Kokkos::deep_copy(d_prefacelem, h_prefacelem);
|
||||
Kokkos::deep_copy(d_argfacelem, h_argfacelem);
|
||||
Kokkos::deep_copy(d_map,h_map);
|
||||
//printf(">>> 6\n");
|
||||
|
||||
}
|
||||
|
||||
@ -109,14 +92,12 @@ ComputeGaussianGridLocalKokkos<DeviceType>::ComputeGaussianGridLocalKokkos(LAMMP
|
||||
template<class DeviceType>
|
||||
ComputeGaussianGridLocalKokkos<DeviceType>::~ComputeGaussianGridLocalKokkos()
|
||||
{
|
||||
//printf(">>> ComputeGaussianGridLocalKokkos destruct begin, copymode %d\n", copymode);
|
||||
if (copymode) return;
|
||||
|
||||
memoryKK->destroy_kokkos(k_cutsq,cutsq);
|
||||
memoryKK->destroy_kokkos(k_alocal,alocal);
|
||||
//gridlocal_allocated = 0;
|
||||
|
||||
//printf(">>> ComputeGaussianGridLocalKokkos end\n");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -125,25 +106,12 @@ template<class DeviceType>
|
||||
void ComputeGaussianGridLocalKokkos<DeviceType>::setup()
|
||||
{
|
||||
|
||||
// Do not call ComputeGrid::setup(), we don't wanna allocate the grid array there.
|
||||
// Instead, call ComputeGrid::set_grid_global and set_grid_local to set the n indices.
|
||||
|
||||
//ComputeGrid::set_grid_global();
|
||||
//ComputeGrid::set_grid_local();
|
||||
ComputeGridLocal::setup();
|
||||
|
||||
// allocate arrays
|
||||
//printf(">>> rows cols kokkos init: %d %d\n", size_local_rows, size_local_cols);
|
||||
memoryKK->create_kokkos(k_alocal, alocal, size_local_rows, size_local_cols, "grid:alocal");
|
||||
|
||||
//gridlocal_allocated = 1;
|
||||
//array = gridall;
|
||||
|
||||
array_local = alocal;
|
||||
|
||||
d_alocal = k_alocal.template view<DeviceType>();
|
||||
//d_grid = k_grid.template view<DeviceType>();
|
||||
//d_gridall = k_gridall.template view<DeviceType>();
|
||||
|
||||
}
|
||||
|
||||
@ -160,8 +128,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::init()
|
||||
template<class DeviceType>
|
||||
void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
|
||||
{
|
||||
//printf(">>> compute_local Kokkos begin\n");
|
||||
|
||||
if (host_flag) {
|
||||
return;
|
||||
}
|
||||
@ -202,11 +168,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
|
||||
team_size_default = 1; // cost will increase with increasing team size //32;//max_neighs;
|
||||
|
||||
if (triclinic){
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
h0 = domain->h[0];
|
||||
h1 = domain->h[1];
|
||||
h2 = domain->h[2];
|
||||
@ -228,9 +189,7 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
|
||||
int vector_length = vector_length_default;
|
||||
int team_size = team_size_default;
|
||||
check_team_size_for<TagComputeGaussianGridLocalNeigh>(chunk_size,team_size,vector_length);
|
||||
//printf(">>> Check 1 %d %d %d\n", chunk_size, team_size, vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagComputeGaussianGridLocalNeigh> policy_neigh(chunk_size,team_size,vector_length);
|
||||
//printf(">>> Check 2\n");
|
||||
Kokkos::parallel_for("ComputeGaussianGridLocalNeigh",policy_neigh,*this);
|
||||
}
|
||||
|
||||
@ -243,8 +202,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::compute_local()
|
||||
k_alocal.template modify<DeviceType>();
|
||||
k_alocal.template sync<LMPHostType>();
|
||||
|
||||
//printf(">>> k_alocal: %f\n", k_alocal.h_view(0,6));
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -254,7 +211,6 @@ KOKKOS_INLINE_FUNCTION
|
||||
void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianGridLocalNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagComputeGaussianGridLocalNeigh>::member_type& team) const
|
||||
{
|
||||
const int ii = team.league_rank();
|
||||
//printf("%d\n", ii);
|
||||
|
||||
if (ii >= chunk_size) return;
|
||||
|
||||
@ -284,7 +240,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianG
|
||||
|
||||
// index ii already captures the proper grid point
|
||||
//int igrid = iz * (nx * ny) + iy * nx + ix;
|
||||
//printf("%d %d\n", ii, igrid);
|
||||
|
||||
// grid2x converts igrid to ix,iy,iz like we've done before
|
||||
// multiply grid integers by grid spacing delx, dely, delz
|
||||
@ -302,11 +257,6 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianG
|
||||
// Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed
|
||||
|
||||
// Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
|
||||
xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
|
||||
xgrid[2] = h2*xgrid[2] + lo2;
|
||||
@ -348,13 +298,10 @@ void ComputeGaussianGridLocalKokkos<DeviceType>::operator() (TagComputeGaussianG
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
|
||||
if (rsq < rnd_cutsq(jtype, jtype) ) {
|
||||
//printf("%f %f\n", d_prefacelem(jtype-1), d_argfacelem(jtype-1));
|
||||
int icol = size_local_cols_base + jtype - 1;
|
||||
d_alocal(igrid, icol) += d_prefacelem(jtype-1) * exp(-rsq * d_argfacelem(jtype-1));
|
||||
}
|
||||
}
|
||||
|
||||
//printf("%f\n", d_alocal(igrid, 6));
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -58,8 +58,6 @@ template <class DeviceType> class ComputeGaussianGridLocalKokkos : public Comput
|
||||
void operator() (TagComputeGaussianGridLocalNeigh, const typename Kokkos::TeamPolicy<DeviceType, TagComputeGaussianGridLocalNeigh>::member_type& team) const;
|
||||
|
||||
private:
|
||||
//double adof, mvv2e, mv2d, boltz;
|
||||
|
||||
Kokkos::View<double*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<double*, DeviceType> d_sigmaelem;
|
||||
Kokkos::View<double*, DeviceType> d_prefacelem;
|
||||
@ -73,21 +71,6 @@ template <class DeviceType> class ComputeGaussianGridLocalKokkos : public Comput
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_fparams_rnd;
|
||||
t_fparams_rnd rnd_cutsq;
|
||||
|
||||
/*
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_v_array v;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d rmass;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d mass;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
*/
|
||||
|
||||
//typename AT::t_neighbors_2d d_neighbors;
|
||||
//typename AT::t_int_1d d_ilist;
|
||||
//typename AT::t_int_1d d_numneigh;
|
||||
|
||||
//DAT::tdual_float_2d k_result;
|
||||
//typename AT::t_float_2d d_result;
|
||||
|
||||
int max_neighs, inum, chunk_size, chunk_offset;
|
||||
int host_flag;
|
||||
@ -103,11 +86,6 @@ template <class DeviceType> class ComputeGaussianGridLocalKokkos : public Comput
|
||||
typename AT::t_float_2d d_alocal;
|
||||
|
||||
// triclinic vars
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
double h0, h1, h2, h3, h4, h5;
|
||||
double lo0, lo1, lo2;
|
||||
};
|
||||
|
||||
@ -23,59 +23,3 @@ template class ComputeSNAGridKokkosHost<LMPHostType>;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// The following chunk will compile but we're gonna try a wrapper approach like pair snap.
|
||||
/*
|
||||
#include "compute_sna_grid_kokkos.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "modify.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "sna_kokkos.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
template<class DeviceType>
|
||||
ComputeSNAGridKokkos<DeviceType>::ComputeSNAGridKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
ComputeSNAGrid(lmp, narg, arg)
|
||||
{
|
||||
|
||||
printf("^^^ inside ComputeSNAGridKokkos constructor\n");
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
template<class DeviceType>
|
||||
ComputeSNAGridKokkos<DeviceType>::~ComputeSNAGridKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class ComputeSNAGridKokkos<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class ComputeSNAGridKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
@ -29,38 +29,13 @@ ComputeStyle(sna/grid/kk/host,ComputeSNAGridKokkosDevice<LMPHostType>);
|
||||
|
||||
#include "compute_sna_grid.h"
|
||||
#include "kokkos_type.h"
|
||||
//#include "pair_snap.h"
|
||||
//#include "kokkos_type.h"
|
||||
//#include "neigh_list_kokkos.h"
|
||||
#include "sna_kokkos.h"
|
||||
//#include "pair_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// Routines for both the CPU and GPU backend
|
||||
//template<int NEIGHFLAG, int EVFLAG>
|
||||
//struct TagPairSNAPComputeForce{};
|
||||
|
||||
|
||||
// GPU backend only
|
||||
/*
|
||||
struct TagPairSNAPComputeNeigh{};
|
||||
struct TagPairSNAPComputeCayleyKlein{};
|
||||
struct TagPairSNAPPreUi{};
|
||||
struct TagPairSNAPComputeUiSmall{}; // more parallelism, more divergence
|
||||
struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
|
||||
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
|
||||
struct TagPairSNAPComputeZi{};
|
||||
struct TagPairSNAPBeta{};
|
||||
struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPComputeYi{};
|
||||
struct TagPairSNAPComputeYiWithZlist{};
|
||||
template<int dir>
|
||||
struct TagPairSNAPComputeFusedDeidrjSmall{}; // more parallelism, more divergence
|
||||
template<int dir>
|
||||
struct TagPairSNAPComputeFusedDeidrjLarge{}; // less parallelism, no divergence
|
||||
*/
|
||||
//struct TagPairSNAPPreUi{};
|
||||
struct TagCSNAGridComputeNeigh{};
|
||||
struct TagCSNAGridComputeCayleyKlein{};
|
||||
struct TagCSNAGridPreUi{};
|
||||
@ -70,26 +45,11 @@ struct TagCSNAGridTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero yl
|
||||
template <bool chemsnap> struct TagCSNAGridComputeZi{};
|
||||
template <bool chemsnap> struct TagCSNAGridComputeBi{};
|
||||
struct TagCSNAGridLocalFill{}; // fill the gridlocal array
|
||||
//struct TagCSNAGridLocalFill2{}; // fill the gridlocal array using same kinda loop as ComputeForce
|
||||
|
||||
struct TagComputeSNAGridLoop{};
|
||||
struct TagComputeSNAGrid3D{};
|
||||
//struct TagCSNAGridTeam{};
|
||||
|
||||
// CPU backend only
|
||||
/*
|
||||
struct TagPairSNAPComputeNeighCPU{};
|
||||
struct TagPairSNAPPreUiCPU{};
|
||||
struct TagPairSNAPComputeUiCPU{};
|
||||
struct TagPairSNAPTransformUiCPU{};
|
||||
struct TagPairSNAPComputeZiCPU{};
|
||||
struct TagPairSNAPBetaCPU{};
|
||||
struct TagPairSNAPComputeBiCPU{};
|
||||
struct TagPairSNAPZeroYiCPU{};
|
||||
struct TagPairSNAPComputeYiCPU{};
|
||||
struct TagPairSNAPComputeDuidrjCPU{};
|
||||
struct TagPairSNAPComputeDeidrjCPU{};
|
||||
*/
|
||||
struct TagComputeSNAGridLoopCPU{};
|
||||
|
||||
//template<class DeviceType>
|
||||
@ -180,7 +140,6 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid {
|
||||
|
||||
// operator function for example team policy
|
||||
//KOKKOS_INLINE_FUNCTION
|
||||
//void operator() (TagCSNAGridTeam, const typename Kokkos::TeamPolicy<DeviceType, TagCSNAGridTeam>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagComputeSNAGridLoop, const int& ) const;
|
||||
@ -191,9 +150,6 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagCSNAGridComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagCSNAGridComputeNeigh>::member_type& team) const;
|
||||
|
||||
// PrintNeigh
|
||||
//void operator() (TagPrintNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPrintNeigh>::member_type& team) const;
|
||||
|
||||
// 3D case - used by parallel_for
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagComputeSNAGrid3D, const int& iz, const int& iy, const int& ix) const;
|
||||
@ -294,11 +250,6 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid {
|
||||
class DomainKokkos *domainKK;
|
||||
|
||||
// triclinic vars
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
double h0, h1, h2, h3, h4, h5;
|
||||
double lo0, lo1, lo2;
|
||||
|
||||
@ -344,45 +295,3 @@ class ComputeSNAGridKokkosHost : public ComputeSNAGridKokkos<DeviceType, SNAP_KO
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// The following will compile with the chunk in cpp file but we're gonna try wrapper like pair snap.
|
||||
/*
|
||||
#ifdef COMPUTE_CLASS
|
||||
// clang-format off
|
||||
ComputeStyle(sna/grid/kk,ComputeSNAGridKokkos<LMPDeviceType>);
|
||||
ComputeStyle(sna/grid/kk/device,ComputeSNAGridKokkos<LMPDeviceType>);
|
||||
ComputeStyle(sna/grid/kk/host,ComputeSNAGridKokkos<LMPHostType>);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
// clang-format off
|
||||
#ifndef LMP_COMPUTE_SNA_GRID_KOKKOS_H
|
||||
#define LMP_COMPUTE_SNA_GRID_KOKKOS_H
|
||||
|
||||
#include "compute_sna_grid.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
//template<int CSTYLE, int NCOL>
|
||||
//struct TagComputeCoordAtom{};
|
||||
|
||||
template<class DeviceType>
|
||||
class ComputeSNAGridKokkos : public ComputeSNAGrid {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
ComputeSNAGridKokkos(class LAMMPS *, int, char **);
|
||||
~ComputeSNAGridKokkos() override;
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
|
||||
@ -27,7 +27,6 @@
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
//#include "sna_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "sna.h"
|
||||
@ -131,14 +130,10 @@ ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::ComputeSNAGridKokkos
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::~ComputeSNAGridKokkos()
|
||||
{
|
||||
//printf(">>> ComputeSNAGridKokkos destruct begin copymode %d\n", copymode);
|
||||
if (copymode) return;
|
||||
//printf(">>> After copymode\n");
|
||||
|
||||
memoryKK->destroy_kokkos(k_cutsq,cutsq);
|
||||
//memoryKK->destroy_kokkos(k_grid,grid);
|
||||
memoryKK->destroy_kokkos(k_gridall, gridall);
|
||||
//memoryKK->destroy_kokkos(k_gridlocal, gridlocal);
|
||||
}
|
||||
|
||||
// Setup
|
||||
@ -161,7 +156,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::setup()
|
||||
array = gridall;
|
||||
|
||||
d_gridlocal = k_gridlocal.template view<DeviceType>();
|
||||
//d_grid = k_grid.template view<DeviceType>();
|
||||
d_gridall = k_gridall.template view<DeviceType>();
|
||||
}
|
||||
|
||||
@ -199,23 +193,14 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::compute_array()
|
||||
|
||||
// "chunksize" variable is default 32768 in compute_sna_grid.cpp, and set by user
|
||||
// `total_range` is the number of grid points which may be larger than chunk size.
|
||||
//printf(">>> total_range: %d\n", total_range);
|
||||
chunk_size = MIN(chunksize, total_range);
|
||||
chunk_offset = 0;
|
||||
//snaKK.grow_rij(chunk_size, ntotal);
|
||||
snaKK.grow_rij(chunk_size, max_neighs);
|
||||
|
||||
//chunk_size = total_range;
|
||||
|
||||
// Pre-compute ceil(chunk_size / vector_length) for code cleanliness
|
||||
const int chunk_size_div = (chunk_size + vector_length - 1) / vector_length;
|
||||
|
||||
if (triclinic) {
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
h0 = domain->h[0];
|
||||
h1 = domain->h[1];
|
||||
h2 = domain->h[2];
|
||||
@ -232,7 +217,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::compute_array()
|
||||
if (chunk_size > total_range - chunk_offset)
|
||||
chunk_size = total_range - chunk_offset;
|
||||
|
||||
//printf(">>> chunk_offset: %d\n", chunk_offset);
|
||||
|
||||
//ComputeNeigh
|
||||
{
|
||||
@ -333,9 +317,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::compute_array()
|
||||
k_gridlocal.template modify<DeviceType>();
|
||||
k_gridlocal.template sync<LMPHostType>();
|
||||
|
||||
//k_grid.template modify<DeviceType>();
|
||||
//k_grid.template sync<LMPHostType>();
|
||||
|
||||
k_gridall.template modify<DeviceType>();
|
||||
k_gridall.template sync<LMPHostType>();
|
||||
}
|
||||
@ -396,7 +377,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
|
||||
|
||||
// index ii already captures the proper grid point
|
||||
//int igrid = iz * (nx * ny) + iy * nx + ix;
|
||||
//printf("%d %d\n", ii, igrid);
|
||||
|
||||
// grid2x converts igrid to ix,iy,iz like we've done before
|
||||
// multiply grid integers by grid spacing delx, dely, delz
|
||||
@ -414,11 +394,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
|
||||
// Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed
|
||||
|
||||
// Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
|
||||
xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
|
||||
xgrid[2] = h2*xgrid[2] + lo2;
|
||||
@ -436,14 +411,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
|
||||
if (chemflag) ielem = d_map[itype];
|
||||
//const double radi = d_radelem[ielem];
|
||||
|
||||
// We need a DomainKokkos::lamda2x parallel for loop here, but let's ignore for now.
|
||||
// The purpose here is to transform for triclinic boxes.
|
||||
/*
|
||||
if (triclinic){
|
||||
printf("We are triclinic %f %f %f\n", xtmp, ytmp, ztmp);
|
||||
}
|
||||
*/
|
||||
|
||||
// Compute the number of neighbors, store rsq
|
||||
int ninside = 0;
|
||||
|
||||
@ -464,29 +431,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
|
||||
ninside++;
|
||||
}
|
||||
|
||||
/*
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,ntotal),
|
||||
[&] (const int j, int& count) {
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
|
||||
// don't include atoms that share location with grid point
|
||||
if (rsq >= rnd_cutsq(itype,jtype) || rsq < 1e-20) {
|
||||
jtype = -1; // use -1 to signal it's outside the radius
|
||||
}
|
||||
|
||||
type_cache[j] = jtype;
|
||||
|
||||
if (jtype >= 0)
|
||||
count++;
|
||||
|
||||
}, ninside);
|
||||
*/
|
||||
|
||||
d_ninside(ii) = ninside;
|
||||
|
||||
// TODO: Adjust for multi-element, currently we set jelem = 0 regardless of type.
|
||||
@ -521,75 +465,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
int offset = 0;
|
||||
for (int j = 0; j < ntotal; j++){
|
||||
const int jtype = type_cache[j];
|
||||
if (jtype >= 0) {
|
||||
printf(">>> offset: %d\n", offset);
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
int jtype = type(j);
|
||||
int jelem = 0;
|
||||
if (chemflag) jelem = d_map[jtype];
|
||||
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
// pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
|
||||
// actually since the views here have values starting at 0, let's use jelem
|
||||
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
|
||||
snaKK.inside(ii,offset) = j;
|
||||
if (switchinnerflag) {
|
||||
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
}
|
||||
if (chemflag)
|
||||
snaKK.element(ii,offset) = jelem;
|
||||
else
|
||||
snaKK.element(ii,offset) = 0;
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,ntotal),
|
||||
[&] (const int j, int& offset, bool final) {
|
||||
|
||||
const int jtype = type_cache[j];
|
||||
|
||||
if (jtype >= 0) {
|
||||
if (final) {
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
int jtype = type(j);
|
||||
int jelem = 0;
|
||||
if (chemflag) jelem = d_map[jtype];
|
||||
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
// pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
|
||||
// actually since the views here have values starting at 0, let's use jelem
|
||||
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
|
||||
snaKK.inside(ii,offset) = j;
|
||||
if (switchinnerflag) {
|
||||
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
}
|
||||
if (chemflag)
|
||||
snaKK.element(ii,offset) = jelem;
|
||||
else
|
||||
snaKK.element(ii,offset) = 0;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
});
|
||||
*/
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -821,11 +696,6 @@ void ComputeSNAGridKokkos<DeviceType, real_type, vector_length>::operator() (Tag
|
||||
// Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed
|
||||
|
||||
// Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
|
||||
xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
|
||||
xgrid[2] = h2*xgrid[2] + lo2;
|
||||
|
||||
@ -23,59 +23,3 @@ template class ComputeSNAGridLocalKokkosHost<LMPHostType>;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// The following chunk will compile but we're gonna try a wrapper approach like pair snap.
|
||||
/*
|
||||
#include "compute_sna_grid_kokkos.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "modify.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "sna_kokkos.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
template<class DeviceType>
|
||||
ComputeSNAGridKokkos<DeviceType>::ComputeSNAGridKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
ComputeSNAGrid(lmp, narg, arg)
|
||||
{
|
||||
|
||||
printf("^^^ inside ComputeSNAGridKokkos constructor\n");
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
template<class DeviceType>
|
||||
ComputeSNAGridKokkos<DeviceType>::~ComputeSNAGridKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class ComputeSNAGridKokkos<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class ComputeSNAGridKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
@ -29,38 +29,13 @@ ComputeStyle(sna/grid/local/kk/host,ComputeSNAGridLocalKokkosDevice<LMPHostType>
|
||||
|
||||
#include "compute_sna_grid_local.h"
|
||||
#include "kokkos_type.h"
|
||||
//#include "pair_snap.h"
|
||||
//#include "kokkos_type.h"
|
||||
//#include "neigh_list_kokkos.h"
|
||||
#include "sna_kokkos.h"
|
||||
//#include "pair_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// Routines for both the CPU and GPU backend
|
||||
//template<int NEIGHFLAG, int EVFLAG>
|
||||
//struct TagPairSNAPComputeForce{};
|
||||
|
||||
|
||||
// GPU backend only
|
||||
/*
|
||||
struct TagPairSNAPComputeNeigh{};
|
||||
struct TagPairSNAPComputeCayleyKlein{};
|
||||
struct TagPairSNAPPreUi{};
|
||||
struct TagPairSNAPComputeUiSmall{}; // more parallelism, more divergence
|
||||
struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
|
||||
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
|
||||
struct TagPairSNAPComputeZi{};
|
||||
struct TagPairSNAPBeta{};
|
||||
struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPComputeYi{};
|
||||
struct TagPairSNAPComputeYiWithZlist{};
|
||||
template<int dir>
|
||||
struct TagPairSNAPComputeFusedDeidrjSmall{}; // more parallelism, more divergence
|
||||
template<int dir>
|
||||
struct TagPairSNAPComputeFusedDeidrjLarge{}; // less parallelism, no divergence
|
||||
*/
|
||||
//struct TagPairSNAPPreUi{};
|
||||
struct TagCSNAGridLocalComputeNeigh{};
|
||||
struct TagCSNAGridLocalComputeCayleyKlein{};
|
||||
struct TagCSNAGridLocalPreUi{};
|
||||
@ -70,25 +45,11 @@ struct TagCSNAGridLocalTransformUi{}; // re-order ulisttot from SoA to AoSoA, ze
|
||||
template <bool chemsnap> struct TagCSNAGridLocalComputeZi{};
|
||||
template <bool chemsnap> struct TagCSNAGridLocalComputeBi{};
|
||||
struct TagCSNAGridLocal2Fill{}; // fill the gridlocal array
|
||||
//struct TagCSNAGridLocalFill2{}; // fill the gridlocal array using same kinda loop as ComputeForce
|
||||
|
||||
struct TagComputeSNAGridLocalLoop{};
|
||||
struct TagComputeSNAGridLocal3D{};
|
||||
|
||||
// CPU backend only
|
||||
/*
|
||||
struct TagPairSNAPComputeNeighCPU{};
|
||||
struct TagPairSNAPPreUiCPU{};
|
||||
struct TagPairSNAPComputeUiCPU{};
|
||||
struct TagPairSNAPTransformUiCPU{};
|
||||
struct TagPairSNAPComputeZiCPU{};
|
||||
struct TagPairSNAPBetaCPU{};
|
||||
struct TagPairSNAPComputeBiCPU{};
|
||||
struct TagPairSNAPZeroYiCPU{};
|
||||
struct TagPairSNAPComputeYiCPU{};
|
||||
struct TagPairSNAPComputeDuidrjCPU{};
|
||||
struct TagPairSNAPComputeDeidrjCPU{};
|
||||
*/
|
||||
struct TagComputeSNAGridLocalLoopCPU{};
|
||||
|
||||
//template<class DeviceType>
|
||||
@ -184,9 +145,6 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagCSNAGridLocalComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagCSNAGridLocalComputeNeigh>::member_type& team) const;
|
||||
|
||||
// PrintNeigh
|
||||
//void operator() (TagPrintNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPrintNeigh>::member_type& team) const;
|
||||
|
||||
// 3D case - used by parallel_for
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagComputeSNAGridLocal3D, const int& iz, const int& iy, const int& ix) const;
|
||||
@ -274,16 +232,6 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal {
|
||||
DAT::tdual_float_2d k_alocal;
|
||||
typename AT::t_float_2d d_alocal;
|
||||
|
||||
/*
|
||||
DAT::tdual_float_2d k_grid;
|
||||
DAT::tdual_float_2d k_gridall;
|
||||
typename AT::t_float_2d d_grid;
|
||||
typename AT::t_float_2d d_gridall;
|
||||
|
||||
DAT::tdual_float_4d k_gridlocal;
|
||||
typename AT::t_float_4d d_gridlocal;
|
||||
*/
|
||||
|
||||
|
||||
// Utility routine which wraps computing per-team scratch size requirements for
|
||||
// ComputeNeigh, ComputeUi, and ComputeFusedDeidrj
|
||||
@ -293,11 +241,6 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal {
|
||||
class DomainKokkos *domainKK;
|
||||
|
||||
// triclinic vars
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
double h0, h1, h2, h3, h4, h5;
|
||||
double lo0, lo1, lo2;
|
||||
|
||||
@ -320,7 +263,6 @@ class ComputeSNAGridLocalKokkosDevice : public ComputeSNAGridLocalKokkos<DeviceT
|
||||
ComputeSNAGridLocalKokkosDevice(class LAMMPS *, int, char **);
|
||||
|
||||
void compute_local() override;
|
||||
//void setup() override;
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -27,7 +27,6 @@
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
//#include "sna_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "sna.h"
|
||||
@ -131,15 +130,10 @@ ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::ComputeSNAGridL
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::~ComputeSNAGridLocalKokkos()
|
||||
{
|
||||
//printf(">>> ComputeSNAGridLocalKokkos destruct begin copymode %d\n", copymode);
|
||||
if (copymode) return;
|
||||
//printf(">>> After copymode\n");
|
||||
|
||||
memoryKK->destroy_kokkos(k_cutsq,cutsq);
|
||||
memoryKK->destroy_kokkos(k_alocal,alocal);
|
||||
//memoryKK->destroy_kokkos(k_grid,grid);
|
||||
//memoryKK->destroy_kokkos(k_gridall, gridall);
|
||||
//memoryKK->destroy_kokkos(k_gridlocal, gridlocal);
|
||||
}
|
||||
|
||||
// Setup
|
||||
@ -148,28 +142,11 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::setup()
|
||||
{
|
||||
|
||||
// Do not call ComputeGrid::setup(), we don't wanna allocate the grid array there.
|
||||
// Instead, call ComputeGrid::set_grid_global and set_grid_local to set the n indices.
|
||||
|
||||
//ComputeGrid::set_grid_global();
|
||||
//ComputeGrid::set_grid_local();
|
||||
//ComputeSNAGridLocal::setup();
|
||||
ComputeGridLocal::setup();
|
||||
|
||||
// allocate arrays
|
||||
//memoryKK->create_kokkos(k_gridall, gridall, size_array_rows, size_array_cols, "grid:gridall");
|
||||
memoryKK->create_kokkos(k_alocal, alocal, size_local_rows, size_local_cols, "grid:alocal");
|
||||
|
||||
// do not use or allocate gridlocal for now
|
||||
|
||||
//gridlocal_allocated = 0;
|
||||
//array = gridall;
|
||||
|
||||
array_local = alocal;
|
||||
|
||||
//d_gridlocal = k_gridlocal.template view<DeviceType>();
|
||||
//d_grid = k_grid.template view<DeviceType>();
|
||||
//d_gridall = k_gridall.template view<DeviceType>();
|
||||
d_alocal = k_alocal.template view<DeviceType>();
|
||||
}
|
||||
|
||||
@ -183,8 +160,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo
|
||||
return;
|
||||
}
|
||||
|
||||
//printf(">>> ComputeSNAGridLocalKokkos::compute_local begin\n");
|
||||
|
||||
copymode = 1;
|
||||
|
||||
zlen = nzhi-nzlo+1;
|
||||
@ -205,12 +180,10 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo
|
||||
|
||||
ntotal = atomKK->nlocal + atomKK->nghost;
|
||||
// Allocate view for number of neighbors per grid point
|
||||
//printf(">>> total_range: %d\n", total_range);
|
||||
MemKK::realloc_kokkos(d_ninside,"ComputeSNAGridLocalKokkos:ninside",total_range);
|
||||
|
||||
// "chunksize" variable is default 32768 in compute_sna_grid.cpp, and set by user
|
||||
// `total_range` is the number of grid points which may be larger than chunk size.
|
||||
//printf(">>> total_range: %d\n", total_range);
|
||||
chunk_size = MIN(chunksize, total_range);
|
||||
chunk_offset = 0;
|
||||
//snaKK.grow_rij(chunk_size, ntotal);
|
||||
@ -222,11 +195,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo
|
||||
const int chunk_size_div = (chunk_size + vector_length - 1) / vector_length;
|
||||
|
||||
if (triclinic) {
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
h0 = domain->h[0];
|
||||
h1 = domain->h[1];
|
||||
h2 = domain->h[2];
|
||||
@ -243,7 +211,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::compute_lo
|
||||
if (chunk_size > total_range - chunk_offset)
|
||||
chunk_size = total_range - chunk_offset;
|
||||
|
||||
//printf(">>> chunk_offset: %d\n", chunk_offset);
|
||||
|
||||
//ComputeNeigh
|
||||
{
|
||||
@ -401,7 +368,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
|
||||
|
||||
// index ii already captures the proper grid point
|
||||
//int igrid = iz * (nx * ny) + iy * nx + ix;
|
||||
//printf("%d %d\n", ii, igrid);
|
||||
|
||||
// grid2x converts igrid to ix,iy,iz like we've done before
|
||||
// multiply grid integers by grid spacing delx, dely, delz
|
||||
@ -419,11 +385,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
|
||||
// Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed
|
||||
|
||||
// Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
|
||||
xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
|
||||
xgrid[2] = h2*xgrid[2] + lo2;
|
||||
@ -454,14 +415,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
|
||||
if (chemflag) ielem = d_map[itype];
|
||||
//const double radi = d_radelem[ielem];
|
||||
|
||||
// We need a DomainKokkos::lamda2x parallel for loop here, but let's ignore for now.
|
||||
// The purpose here is to transform for triclinic boxes.
|
||||
/*
|
||||
if (triclinic){
|
||||
printf("We are triclinic %f %f %f\n", xtmp, ytmp, ztmp);
|
||||
}
|
||||
*/
|
||||
|
||||
// Compute the number of neighbors, store rsq
|
||||
int ninside = 0;
|
||||
|
||||
@ -482,29 +435,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
|
||||
ninside++;
|
||||
}
|
||||
|
||||
/*
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,ntotal),
|
||||
[&] (const int j, int& count) {
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
|
||||
// don't include atoms that share location with grid point
|
||||
if (rsq >= rnd_cutsq(itype,jtype) || rsq < 1e-20) {
|
||||
jtype = -1; // use -1 to signal it's outside the radius
|
||||
}
|
||||
|
||||
type_cache[j] = jtype;
|
||||
|
||||
if (jtype >= 0)
|
||||
count++;
|
||||
|
||||
}, ninside);
|
||||
*/
|
||||
|
||||
d_ninside(ii) = ninside;
|
||||
|
||||
// TODO: Adjust for multi-element, currently we set jelem = 0 regardless of type.
|
||||
@ -539,75 +469,6 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
int offset = 0;
|
||||
for (int j = 0; j < ntotal; j++){
|
||||
const int jtype = type_cache[j];
|
||||
if (jtype >= 0) {
|
||||
printf(">>> offset: %d\n", offset);
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
int jtype = type(j);
|
||||
int jelem = 0;
|
||||
if (chemflag) jelem = d_map[jtype];
|
||||
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
// pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
|
||||
// actually since the views here have values starting at 0, let's use jelem
|
||||
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
|
||||
snaKK.inside(ii,offset) = j;
|
||||
if (switchinnerflag) {
|
||||
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
}
|
||||
if (chemflag)
|
||||
snaKK.element(ii,offset) = jelem;
|
||||
else
|
||||
snaKK.element(ii,offset) = 0;
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,ntotal),
|
||||
[&] (const int j, int& offset, bool final) {
|
||||
|
||||
const int jtype = type_cache[j];
|
||||
|
||||
if (jtype >= 0) {
|
||||
if (final) {
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
int jtype = type(j);
|
||||
int jelem = 0;
|
||||
if (chemflag) jelem = d_map[jtype];
|
||||
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
// pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp
|
||||
// actually since the views here have values starting at 0, let's use jelem
|
||||
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
snaKK.rcutij(ii,offset) = static_cast<real_type>((2.0 * d_radelem[jelem])*rcutfac);
|
||||
snaKK.inside(ii,offset) = j;
|
||||
if (switchinnerflag) {
|
||||
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
}
|
||||
if (chemflag)
|
||||
snaKK.element(ii,offset) = jelem;
|
||||
else
|
||||
snaKK.element(ii,offset) = 0;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
});
|
||||
*/
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -839,22 +700,11 @@ void ComputeSNAGridLocalKokkos<DeviceType, real_type, vector_length>::operator()
|
||||
// Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed
|
||||
|
||||
// Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats.
|
||||
/*
|
||||
xgrid[0] = domain->h[0]*xgrid[0] + domain->h[5]*xgrid[1] + domain->h[4]*xgrid[2] + domain->boxlo[0];
|
||||
xgrid[1] = domain->h[1]*xgrid[1] + domain->h[3]*xgrid[2] + domain->boxlo[1];
|
||||
xgrid[2] = domain->h[2]*xgrid[2] + domain->boxlo[2];
|
||||
*/
|
||||
xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0;
|
||||
xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1;
|
||||
xgrid[2] = h2*xgrid[2] + lo2;
|
||||
}
|
||||
|
||||
//const F_FLOAT xtmp = xgrid[0];
|
||||
//const F_FLOAT ytmp = xgrid[1];
|
||||
//const F_FLOAT ztmp = xgrid[2];
|
||||
//d_gridall(igrid,0) = xtmp;
|
||||
//d_gridall(igrid,1) = ytmp;
|
||||
//d_gridall(igrid,2) = ztmp;
|
||||
|
||||
const auto idxb_max = snaKK.idxb_max;
|
||||
|
||||
|
||||
@ -89,14 +89,12 @@ ComputeGaussianGridLocal::ComputeGaussianGridLocal(LAMMPS *lmp, int narg, char *
|
||||
|
||||
ComputeGaussianGridLocal::~ComputeGaussianGridLocal()
|
||||
{
|
||||
//printf(">>> ComputeGaussianGridLocal begin destruct copymode %d\n", copymode);
|
||||
if (copymode) return;
|
||||
memory->destroy(radelem);
|
||||
memory->destroy(sigmaelem);
|
||||
memory->destroy(prefacelem);
|
||||
memory->destroy(argfacelem);
|
||||
memory->destroy(cutsq);
|
||||
//printf(">>> ComputeGaussianGridLocal end destruct\n");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -111,8 +109,6 @@ void ComputeGaussianGridLocal::init()
|
||||
|
||||
void ComputeGaussianGridLocal::compute_local()
|
||||
{
|
||||
//printf(">>> compute_local CPU\n");
|
||||
//printf(">>> size_local_cols_base, size_local_cols: %d %d\n", size_local_cols_base, size_local_cols);
|
||||
invoked_local = update->ntimestep;
|
||||
|
||||
// compute gaussian for each gridpoint
|
||||
|
||||
@ -88,7 +88,6 @@ void ComputeGrid::grid2x(int igrid, double *x)
|
||||
x[2] = iz * delz;
|
||||
|
||||
if (triclinic) domain->lamda2x(x, x);
|
||||
//printf(">>>>> ComputeGrid::grid2x\n");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -104,7 +103,6 @@ void ComputeGrid::assign_coords_all()
|
||||
gridall[igrid][1] = x[1];
|
||||
gridall[igrid][2] = x[2];
|
||||
}
|
||||
//printf(">>>>> ComputeGrid::assign_coords_all\n");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -113,7 +111,6 @@ void ComputeGrid::assign_coords_all()
|
||||
|
||||
void ComputeGrid::allocate()
|
||||
{
|
||||
//printf(">>> ComputeGrid::allocate\n");
|
||||
// allocate arrays
|
||||
memory->create(grid, size_array_rows, size_array_cols, "grid:grid");
|
||||
memory->create(gridall, size_array_rows, size_array_cols, "grid:gridall");
|
||||
|
||||
@ -61,9 +61,7 @@ ComputeGridLocal::ComputeGridLocal(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
ComputeGridLocal::~ComputeGridLocal()
|
||||
{
|
||||
//printf(">>> ComputeGridLocal begin destruct\n");
|
||||
deallocate();
|
||||
//printf(">>> ComputeGridLocal end destruct\n");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -75,7 +73,6 @@ void ComputeGridLocal::setup()
|
||||
set_grid_local();
|
||||
allocate();
|
||||
assign_coords();
|
||||
//printf(">>> ComputeGridLocal setup nx ny nz %d %d %d %d %d %d\n", nxlo, nxhi, nylo, nyhi, nzlo, nzhi);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -109,7 +106,6 @@ void ComputeGridLocal::grid2lamda(int ix, int iy, int iz, double *x)
|
||||
|
||||
void ComputeGridLocal::allocate()
|
||||
{
|
||||
//printf(">>> ComputeGridLocal::allocate %d %d\n", size_local_rows, size_local_cols);
|
||||
if (nxlo <= nxhi && nylo <= nyhi && nzlo <= nzhi) {
|
||||
gridlocal_allocated = 1;
|
||||
memory->create(alocal, size_local_rows, size_local_cols, "compute/grid/local:alocal");
|
||||
@ -123,14 +119,12 @@ void ComputeGridLocal::allocate()
|
||||
|
||||
void ComputeGridLocal::deallocate()
|
||||
{
|
||||
//printf(">>> ComputeGridLocal::deallocate begin gridlocal_allocated %d copymode %d\n", gridlocal_allocated, copymode);
|
||||
if (copymode) return;
|
||||
|
||||
if (gridlocal_allocated) {
|
||||
gridlocal_allocated = 0;
|
||||
memory->destroy(alocal);
|
||||
}
|
||||
//printf(">>> ComputeGridLocal:: deallocate end\n");
|
||||
array_local = nullptr;
|
||||
}
|
||||
|
||||
@ -186,8 +180,6 @@ void ComputeGridLocal::set_grid_local()
|
||||
// the 2 equality if tests ensure a consistent decision
|
||||
// as to which proc owns it
|
||||
|
||||
//printf(">>> ComputeGridLocal set_grid_local\n");
|
||||
|
||||
double xfraclo, xfrachi, yfraclo, yfrachi, zfraclo, zfrachi;
|
||||
|
||||
if (comm->layout != Comm::LAYOUT_TILED) {
|
||||
|
||||
Reference in New Issue
Block a user