Merge branch 'master' into fix-ttm-grid

# Conflicts:
#	doc/src/pair_snap.rst
This commit is contained in:
Axel Kohlmeyer
2021-08-30 16:37:51 -04:00
30 changed files with 162 additions and 170 deletions

View File

@ -193,17 +193,17 @@ The keywords *chunksize* and *parallelthresh* are only applicable when
using the pair style *snap* with the KOKKOS package on GPUs and are using the pair style *snap* with the KOKKOS package on GPUs and are
ignored otherwise. The *chunksize* keyword controls the number of atoms ignored otherwise. The *chunksize* keyword controls the number of atoms
in each pass used to compute the bispectrum components and is used to in each pass used to compute the bispectrum components and is used to
avoid running out of memory. For example if there are 8192 atoms in the avoid running out of memory. For example if there are 8192 atoms in the
simulation and the *chunksize* is set to 4096, the bispectrum simulation and the *chunksize* is set to 4096, the bispectrum
calculation will be broken up into two passes (running on a single GPU). calculation will be broken up into two passes (running on a single GPU).
The *parallelthresh* keyword controls a crossover threshold for The *parallelthresh* keyword controls a crossover threshold for
performing extra parallelism. For small systems, exposing additional performing extra parallelism. For small systems, exposing additional
parallelism can be beneficial when there is not enough work to fully parallelism can be beneficial when there is not enough work to fully
saturate the GPU threads otherwise. However, the extra parallelism also saturate the GPU threads otherwise. However, the extra parallelism also
leads to more divergence and can hurt performance when the system is leads to more divergence and can hurt performance when the system is
already large enough to saturate the GPU threads. Extra parallelism will already large enough to saturate the GPU threads. Extra parallelism
be performed if the *chunksize* (or total number of atoms per GPU) is will be performed if the *chunksize* (or total number of atoms per GPU)
smaller than *parallelthresh*. is smaller than *parallelthresh*.
Detailed definitions for all the other keywords Detailed definitions for all the other keywords
are given on the :doc:`compute sna/atom <compute_sna_atom>` doc page. are given on the :doc:`compute sna/atom <compute_sna_atom>` doc page.

View File

@ -56,7 +56,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
const int max_nbors, const int maxspecial, const int max_nbors, const int maxspecial,
const double cell_size, const double gpu_split, const double cell_size, const double gpu_split,
FILE *_screen, const void *pair_program, FILE *_screen, const void *pair_program,
const char *k_name) { const char *k_name, const int disable_fast_math) {
screen=_screen; screen=_screen;
int gpu_nbor=0; int gpu_nbor=0;
@ -83,7 +83,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
_block_size=device->pair_block_size(); _block_size=device->pair_block_size();
_block_bio_size=device->block_bio_pair(); _block_bio_size=device->block_bio_pair();
compile_kernels(*ucl_device,pair_program,k_name); compile_kernels(*ucl_device,pair_program,k_name,disable_fast_math);
if (_threads_per_atom>1 && gpu_nbor==0) { if (_threads_per_atom>1 && gpu_nbor==0) {
nbor->packing(true); nbor->packing(true);
@ -321,14 +321,20 @@ double BaseChargeT::host_memory_usage_atomic() const {
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str, void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
const char *kname) { const char *kname,
const int disable_fast_math) {
if (_compiled) if (_compiled)
return; return;
std::string s_fast=std::string(kname)+"_fast"; std::string s_fast=std::string(kname)+"_fast";
if (pair_program) delete pair_program; if (pair_program) delete pair_program;
pair_program=new UCL_Program(dev); pair_program=new UCL_Program(dev);
std::string oclstring = device->compile_string()+" -DEVFLAG=1"; std::string device_compile_string;
if (disable_fast_math)
device_compile_string = device->compile_string_nofast();
else
device_compile_string = device->compile_string();
std::string oclstring = device_compile_string+" -DEVFLAG=1";
pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen); pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen);
k_pair_fast.set_function(*pair_program,s_fast.c_str()); k_pair_fast.set_function(*pair_program,s_fast.c_str());
k_pair.set_function(*pair_program,kname); k_pair.set_function(*pair_program,kname);
@ -336,7 +342,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
q_tex.get_texture(*pair_program,"q_tex"); q_tex.get_texture(*pair_program,"q_tex");
#if defined(LAL_OCL_EV_JIT) #if defined(LAL_OCL_EV_JIT)
oclstring = device->compile_string()+" -DEVFLAG=0"; oclstring = device_compile_string+" -DEVFLAG=0";
if (pair_program_noev) delete pair_program_noev; if (pair_program_noev) delete pair_program_noev;
pair_program_noev=new UCL_Program(dev); pair_program_noev=new UCL_Program(dev);
pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen); pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen);

View File

@ -44,6 +44,7 @@ class BaseCharge {
* \param cell_size cutoff + skin * \param cell_size cutoff + skin
* \param gpu_split fraction of particles handled by device * \param gpu_split fraction of particles handled by device
* \param k_name name for the kernel for force calculation * \param k_name name for the kernel for force calculation
* \param disable_fast_math override any fast math opts for kernel JIT
* *
* Returns: * Returns:
* - 0 if successful * - 0 if successful
@ -54,7 +55,8 @@ class BaseCharge {
int init_atomic(const int nlocal, const int nall, const int max_nbors, int init_atomic(const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen, const double gpu_split, FILE *screen,
const void *pair_program, const char *k_name); const void *pair_program, const char *k_name,
const int disable_fast_math = 0);
/// Estimate the overhead for GPU context changes and CPU driver /// Estimate the overhead for GPU context changes and CPU driver
void estimate_gpu_overhead(const int add_kernels=0); void estimate_gpu_overhead(const int add_kernels=0);
@ -198,7 +200,8 @@ class BaseCharge {
double _gpu_overhead, _driver_overhead; double _gpu_overhead, _driver_overhead;
UCL_D_Vec<int> *_nbor_data; UCL_D_Vec<int> *_nbor_data;
void compile_kernels(UCL_Device &dev, const void *pair_string, const char *k); void compile_kernels(UCL_Device &dev, const void *pair_string,
const char *k, const int disable_fast_math);
virtual int loop(const int eflag, const int vflag) = 0; virtual int loop(const int eflag, const int vflag) = 0;
}; };

View File

@ -224,7 +224,9 @@ void BaseEllipsoidT::output_times() {
#ifdef USE_OPENCL #ifdef USE_OPENCL
// Workaround for timing issue on Intel OpenCL // Workaround for timing issue on Intel OpenCL
if (times[0] > 80e6) times[0]=0.0;
if (times[3] > 80e6) times[3]=0.0; if (times[3] > 80e6) times[3]=0.0;
if (times[6] > 80e6) times[6]=0.0;
#endif #endif
if (device->replica_me()==0) if (device->replica_me()==0)
@ -237,17 +239,18 @@ void BaseEllipsoidT::output_times() {
fprintf(screen,"\n-------------------------------------"); fprintf(screen,"\n-------------------------------------");
fprintf(screen,"--------------------------------\n"); fprintf(screen,"--------------------------------\n");
if (device->procs_per_gpu()==1 && times[3]>0) { if (device->procs_per_gpu()==1 && (times[3] > 0.0)) {
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size); if (times[0] > 0.0)
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size);
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size); fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size);
if (nbor->gpu_nbor()>0) if (nbor->gpu_nbor() > 0.0)
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size); fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size);
else else
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size); fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size);
fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size); fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size);
fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size); fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size);
} }
if (times[6]>0) if (times[6] > 0.0)
fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size); fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size);
fprintf(screen,"Average split: %.4f.\n",avg_split); fprintf(screen,"Average split: %.4f.\n",avg_split);
fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom); fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom);

View File

@ -57,7 +57,7 @@ int BornCoulWolfT::init(const int ntypes, double **host_cutsq, double **host_rho
const double alf, const double e_shift, const double f_shift) { const double alf, const double e_shift, const double f_shift) {
int success; int success;
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
_screen,born_coul_wolf,"k_born_coul_wolf"); _screen,born_coul_wolf,"k_born_coul_wolf",1);
if (success!=0) if (success!=0)
return success; return success;

View File

@ -42,7 +42,7 @@ int BornCoulWolfCST::init(const int ntypes, double **host_cutsq, double **host_r
const double alf, const double e_shift, const double f_shift) { const double alf, const double e_shift, const double f_shift) {
int success; int success;
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
_screen,born_coul_wolf_cs,"k_born_coul_wolf_cs"); _screen,born_coul_wolf_cs,"k_born_coul_wolf_cs",1);
if (success!=0) if (success!=0)
return success; return success;

View File

@ -420,6 +420,16 @@ int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) {
return 0; return 0;
} }
template <class numtyp, class acctyp>
std::string DeviceT::compile_string_nofast() {
std::string no_fast = _ocl_compile_string;
size_t p = no_fast.find("-cl-fast-relaxed-math ");
if (p != std::string::npos) no_fast.erase(p,22);
p = no_fast.find("-DFAST_MATH=");
if (p != std::string::npos) no_fast[p + 12]='0';
return no_fast;
}
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge, int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
const bool rot, const int nlocal, const bool rot, const int nlocal,
@ -777,28 +787,30 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
#ifdef USE_OPENCL #ifdef USE_OPENCL
// Workaround for timing issue on Intel OpenCL // Workaround for timing issue on Intel OpenCL
if (times[0] > 80e6) times[0]=0.0;
if (times[3] > 80e6) times[3]=0.0; if (times[3] > 80e6) times[3]=0.0;
if (times[5] > 80e6) times[5]=0.0; if (times[5] > 80e6) times[5]=0.0;
#endif #endif
if (replica_me()==0) if (replica_me()==0)
if (screen && times[6]>0.0) { if (screen && (times[6] > 0.0)) {
fprintf(screen,"\n\n-------------------------------------"); fprintf(screen,"\n\n-------------------------------------");
fprintf(screen,"--------------------------------\n"); fprintf(screen,"--------------------------------\n");
fprintf(screen," Device Time Info (average): "); fprintf(screen," Device Time Info (average): ");
fprintf(screen,"\n-------------------------------------"); fprintf(screen,"\n-------------------------------------");
fprintf(screen,"--------------------------------\n"); fprintf(screen,"--------------------------------\n");
if (time_device() && times[3]>0) { if (time_device() && (times[3] > 0.0)) {
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size); if (times[0] > 0.0)
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size); fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size);
if (nbor.gpu_nbor()>0) if (nbor.gpu_nbor() > 0.0)
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size); fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size);
else else
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size); fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size);
fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size); fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size);
} }
if (times[5]>0) if (times[5] > 0.0)
fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size); fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
fprintf(screen,"Average split: %.4f.\n",avg_split); fprintf(screen,"Average split: %.4f.\n",avg_split);
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom); fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);

View File

@ -312,6 +312,7 @@ class Device {
} }
inline std::string compile_string() { return _ocl_compile_string; } inline std::string compile_string() { return _ocl_compile_string; }
std::string compile_string_nofast();
inline std::string ocl_config_name() { return _ocl_config_name; } inline std::string ocl_config_name() { return _ocl_config_name; }
template <class t> template <class t>

View File

@ -406,8 +406,8 @@ KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POW
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX)) KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Incompatible flags? # Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc )) KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc )
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )

View File

@ -13,25 +13,26 @@
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "fix_gpu.h" #include "fix_gpu.h"
#include <cstring>
#include "atom.h" #include "atom.h"
#include "citeme.h"
#include "comm.h" #include "comm.h"
#include "domain.h"
#include "error.h"
#include "force.h" #include "force.h"
#include "gpu_extra.h"
#include "input.h"
#include "modify.h"
#include "neighbor.h"
#include "pair.h" #include "pair.h"
#include "pair_hybrid.h" #include "pair_hybrid.h"
#include "pair_hybrid_overlay.h" #include "pair_hybrid_overlay.h"
#include "respa.h" #include "respa.h"
#include "input.h"
#include "timer.h" #include "timer.h"
#include "modify.h"
#include "update.h"
#include "domain.h"
#include "universe.h" #include "universe.h"
#include "gpu_extra.h" #include "update.h"
#include "neighbor.h"
#include "citeme.h" #include <cstring>
#include "error.h"
#if (LAL_USE_OMP == 1) #if (LAL_USE_OMP == 1)
#include <omp.h> #include <omp.h>
@ -275,12 +276,15 @@ void FixGPU::init()
error->warning(FLERR,"Using package gpu without any pair style defined"); error->warning(FLERR,"Using package gpu without any pair style defined");
// make sure fdotr virial is not accumulated multiple times // make sure fdotr virial is not accumulated multiple times
// also disallow GPU neighbor lists for hybrid styles
if (force->pair_match("^hybrid",0) != nullptr) { if (force->pair_match("^hybrid",0) != nullptr) {
PairHybrid *hybrid = (PairHybrid *) force->pair; PairHybrid *hybrid = (PairHybrid *) force->pair;
for (int i = 0; i < hybrid->nstyles; i++) for (int i = 0; i < hybrid->nstyles; i++)
if (!utils::strmatch(hybrid->keywords[i],"/gpu$")) if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
force->pair->no_virial_fdotr_compute = 1; force->pair->no_virial_fdotr_compute = 1;
if (_gpu_mode != GPU_FORCE)
error->all(FLERR, "Must not use GPU neighbor lists with hybrid pair style");
} }
// rRESPA support // rRESPA support
@ -295,8 +299,7 @@ void FixGPU::setup(int vflag)
{ {
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
if (neighbor->exclude_setting() != 0) if (neighbor->exclude_setting() != 0)
error->all(FLERR, error->all(FLERR, "Cannot use neigh_modify exclude with GPU neighbor builds");
"Cannot use neigh_modify exclude with GPU neighbor builds");
if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag); if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag);
else { else {

View File

@ -30,7 +30,7 @@ class FixFreeze : public Fix {
int setmask(); int setmask();
void init(); void init();
void setup(int); void setup(int);
void post_force(int); virtual void post_force(int);
void post_force_respa(int, int, int); void post_force_respa(int, int, int);
double compute_vector(int); double compute_vector(int);

View File

@ -20,10 +20,6 @@
#define USE_OMP_SIMD #define USE_OMP_SIMD
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER #define __INTEL_COMPILER __INTEL_LLVM_COMPILER
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER #define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
#define _MM_SCALE_1 1
#define _MM_SCALE_2 2
#define _MM_SCALE_4 4
#define _MM_SCALE_8 8
#endif #endif
#ifdef __INTEL_COMPILER #ifdef __INTEL_COMPILER

View File

@ -35,6 +35,13 @@ authors for more details.
#ifdef __AVX512F__ #ifdef __AVX512F__
#ifndef _MM_SCALE_1
#define _MM_SCALE_1 1
#define _MM_SCALE_2 2
#define _MM_SCALE_4 4
#define _MM_SCALE_8 8
#endif
namespace ip_simd { namespace ip_simd {
typedef __mmask16 SIMD_mask; typedef __mmask16 SIMD_mask;

View File

@ -1,4 +1,3 @@
// clang-format off
/* -*- c++ -*- ---------------------------------------------------------- /* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories https://www.lammps.org/, Sandia National Laboratories
@ -20,6 +19,7 @@ ComputeStyle(temp/deform/kk/host,ComputeTempDeformKokkos<LMPHostType>);
// clang-format on // clang-format on
#else #else
// clang-format off
#ifndef LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H #ifndef LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
#define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H #define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H

View File

@ -28,41 +28,16 @@ FixFreezeKokkos<DeviceType>::FixFreezeKokkos(LAMMPS *lmp, int narg, char **arg)
atomKK = (AtomKokkos *)atom; atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space; execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = F_MASK | MASK_MASK; datamask_read = F_MASK | MASK_MASK | TORQUE_MASK;
datamask_modify = F_MASK | TORQUE_MASK; datamask_modify = F_MASK | TORQUE_MASK;
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixFreezeKokkos<DeviceType>::setmask()
{
return FixFreeze::setmask();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixFreezeKokkos<DeviceType>::init()
{
FixFreeze::init();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixFreezeKokkos<DeviceType>::setup(int vflag)
{
FixFreeze::setup(vflag);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<class DeviceType>
void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/) void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
{ {
atomKK->sync(execution_space,datamask_read); atomKK->sync(execution_space,datamask_read);
atomKK->modified(execution_space,datamask_modify);
f = atomKK->k_f.view<DeviceType>(); f = atomKK->k_f.view<DeviceType>();
torque = atomKK->k_torque.view<DeviceType>(); torque = atomKK->k_torque.view<DeviceType>();
@ -80,28 +55,10 @@ void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
foriginal[0] = original.values[0]; foriginal[0] = original.values[0];
foriginal[1] = original.values[1]; foriginal[1] = original.values[1];
foriginal[2] = original.values[2]; foriginal[2] = original.values[2];
atomKK->modified(execution_space,datamask_modify);
} }
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixFreezeKokkos<DeviceType>::post_force_respa(int vflag, int /*ilevel*/, int /*iloop*/)
{
post_force(vflag);
}
/* ----------------------------------------------------------------------
return components of total force on fix group before force was changed
------------------------------------------------------------------------- */
template<class DeviceType>
double FixFreezeKokkos<DeviceType>::compute_vector(int n)
{
return FixFreeze::compute_vector(n);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<class DeviceType>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const { void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const {

View File

@ -31,6 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType> template<class DeviceType>
class FixFreezeKokkos : public FixFreeze { class FixFreezeKokkos : public FixFreeze {
public: public:
typedef DeviceType device_type;
struct OriginalForce { struct OriginalForce {
double values[3]; double values[3];
@ -58,12 +59,7 @@ class FixFreezeKokkos : public FixFreeze {
}; };
FixFreezeKokkos(class LAMMPS *, int, char **); FixFreezeKokkos(class LAMMPS *, int, char **);
int setmask();
void init();
void setup(int);
void post_force(int); void post_force(int);
void post_force_respa(int, int, int);
double compute_vector(int);
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(const int i, OriginalForce &original) const; void operator()(const int i, OriginalForce &original) const;

View File

@ -87,6 +87,9 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
{ {
copymode = 1; copymode = 1;
k_firstflag.sync<DeviceType>();
k_firstvalue.sync<DeviceType>();
h_resize() = 1; h_resize() = 1;
while (h_resize() > 0) { while (h_resize() > 0) {
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this); FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
@ -168,6 +171,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
{ {
tag = atomKK->k_tag.view<DeviceType>(); tag = atomKK->k_tag.view<DeviceType>();
k_firstflag.sync<DeviceType>();
k_firstvalue.sync<DeviceType>();
int inum = pair->list->inum; int inum = pair->list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list); NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
d_numneigh = k_list->d_numneigh; d_numneigh = k_list->d_numneigh;
@ -185,8 +191,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) { if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) {
maxatom = nall; maxatom = nall;
d_firstflag = Kokkos::View<int**>("neighbor_history:firstflag",maxatom,k_list->maxneighs); k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs);
d_firstvalue = Kokkos::View<LMP_FLOAT**>("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum); k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
d_firstflag = k_firstflag.view<DeviceType>();
d_firstvalue = k_firstvalue.view<DeviceType>();
} }
copymode = 1; copymode = 1;
@ -194,6 +202,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this); FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
Kokkos::parallel_for(inum,f); Kokkos::parallel_for(inum,f);
k_firstflag.modify<DeviceType>();
k_firstvalue.modify<DeviceType>();
copymode = 0; copymode = 0;
} }

View File

@ -50,10 +50,13 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void post_neighbor_item(const int &ii) const; void post_neighbor_item(const int &ii) const;
typename Kokkos::View<int**> d_firstflag; typename DAT::tdual_int_2d k_firstflag;
typename Kokkos::View<LMP_FLOAT**> d_firstvalue; typename DAT::tdual_float_2d k_firstvalue;
private: private:
typename ArrayTypes<DeviceType>::t_int_2d d_firstflag;
typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue;
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner; typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner; typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner; typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
@ -74,6 +77,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
template <class DeviceType> template <class DeviceType>
struct FixNeighHistoryKokkosZeroPartnerCountFunctor { struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
typedef DeviceType device_type;
FixNeighHistoryKokkos<DeviceType> c; FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -84,6 +88,7 @@ struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
template <class DeviceType> template <class DeviceType>
struct FixNeighHistoryKokkosPreExchangeFunctor { struct FixNeighHistoryKokkosPreExchangeFunctor {
typedef DeviceType device_type;
FixNeighHistoryKokkos<DeviceType> c; FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -94,6 +99,7 @@ struct FixNeighHistoryKokkosPreExchangeFunctor {
template <class DeviceType> template <class DeviceType>
struct FixNeighHistoryKokkosPostNeighborFunctor { struct FixNeighHistoryKokkosPostNeighborFunctor {
typedef DeviceType device_type;
FixNeighHistoryKokkos<DeviceType> c; FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION

View File

@ -31,8 +31,8 @@ FixNVESphereKokkos<DeviceType>::FixNVESphereKokkos(LAMMPS *lmp, int narg, char *
atomKK = (AtomKokkos *)atom; atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space; execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK; datamask_read = EMPTY_MASK;
datamask_modify = X_MASK | V_MASK | OMEGA_MASK; datamask_modify = EMPTY_MASK;
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -61,8 +61,7 @@ void FixNVESphereKokkos<DeviceType>::init()
template<class DeviceType> template<class DeviceType>
void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/) void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
{ {
atomKK->sync(execution_space,datamask_read); atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
atomKK->modified(execution_space,datamask_modify);
x = atomKK->k_x.view<DeviceType>(); x = atomKK->k_x.view<DeviceType>();
v = atomKK->k_v.view<DeviceType>(); v = atomKK->k_v.view<DeviceType>();
@ -78,6 +77,8 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this); FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this);
Kokkos::parallel_for(nlocal,f); Kokkos::parallel_for(nlocal,f);
atomKK->modified(execution_space, X_MASK | V_MASK | OMEGA_MASK);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -109,8 +110,7 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate_item(const int i) const
template<class DeviceType> template<class DeviceType>
void FixNVESphereKokkos<DeviceType>::final_integrate() void FixNVESphereKokkos<DeviceType>::final_integrate()
{ {
atomKK->sync(execution_space,datamask_read); atomKK->sync(execution_space, V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
atomKK->modified(execution_space,datamask_modify);
v = atomKK->k_v.view<DeviceType>(); v = atomKK->k_v.view<DeviceType>();
omega = atomKK->k_omega.view<DeviceType>(); omega = atomKK->k_omega.view<DeviceType>();
@ -125,6 +125,8 @@ void FixNVESphereKokkos<DeviceType>::final_integrate()
FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this); FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this);
Kokkos::parallel_for(nlocal,f); Kokkos::parallel_for(nlocal,f);
atomKK->modified(execution_space, V_MASK | OMEGA_MASK);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */

View File

@ -56,6 +56,7 @@ class FixNVESphereKokkos : public FixNVESphere {
template <class DeviceType> template <class DeviceType>
struct FixNVESphereKokkosInitialIntegrateFunctor { struct FixNVESphereKokkosInitialIntegrateFunctor {
typedef DeviceType device_type;
FixNVESphereKokkos<DeviceType> c; FixNVESphereKokkos<DeviceType> c;
FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); } FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -66,6 +67,7 @@ struct FixNVESphereKokkosInitialIntegrateFunctor {
template <class DeviceType> template <class DeviceType>
struct FixNVESphereKokkosFinalIntegrateFunctor { struct FixNVESphereKokkosFinalIntegrateFunctor {
typedef DeviceType device_type;
FixNVESphereKokkos<DeviceType> c; FixNVESphereKokkos<DeviceType> c;
FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); } FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION

View File

@ -65,8 +65,6 @@ void FixNVTSllodKokkos<DeviceType>::init()
{ {
FixNHKokkos<DeviceType>::init(); FixNHKokkos<DeviceType>::init();
vdelu = typename ArrayTypes<DeviceType>::t_v_array("nvt/sllod/kk:vdelu", atomKK->nlocal);
if (!this->temperature->tempbias) if (!this->temperature->tempbias)
this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias"); this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias");
@ -100,7 +98,7 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
// calculate temperature since some computes require temp // calculate temperature since some computes require temp
// computed on current nlocal atoms to remove bias // computed on current nlocal atoms to remove bias
if (nondeformbias){ if (nondeformbias) {
atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read);
this->temperature->compute_scalar(); this->temperature->compute_scalar();
atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify);
@ -115,6 +113,9 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
d_h_two = Few<double, 6>(h_two); d_h_two = Few<double, 6>(h_two);
if (vdelu.extent(0) < atomKK->nmax)
vdelu = typename AT::t_v_array(Kokkos::NoInit("nvt/sllod/kk:vdelu"), atomKK->nmax);
this->copymode = 1; this->copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNVTSllod_temp1>(0,nlocal),*this); Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNVTSllod_temp1>(0,nlocal),*this);
this->copymode = 0; this->copymode = 0;

View File

@ -35,6 +35,9 @@ struct TagFixNVTSllod_temp2{};
template<class DeviceType> template<class DeviceType>
class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> { class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
public: public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
FixNVTSllodKokkos(class LAMMPS *, int, char **); FixNVTSllodKokkos(class LAMMPS *, int, char **);
~FixNVTSllodKokkos() {} ~FixNVTSllodKokkos() {}
void init(); void init();
@ -51,14 +54,14 @@ class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
void nh_v_temp(); void nh_v_temp();
protected: protected:
typename ArrayTypes<DeviceType>::t_x_array x; typename AT::t_x_array x;
typename ArrayTypes<DeviceType>::t_v_array v; typename AT::t_v_array v;
typename ArrayTypes<DeviceType>::t_v_array vdelu; typename AT::t_v_array vdelu;
typename ArrayTypes<DeviceType>::t_f_array_const f; typename AT::t_f_array_const f;
typename ArrayTypes<DeviceType>::t_float_1d rmass; typename AT::t_float_1d rmass;
typename ArrayTypes<DeviceType>::t_float_1d mass; typename AT::t_float_1d mass;
typename ArrayTypes<DeviceType>::t_int_1d type; typename AT::t_int_1d type;
typename ArrayTypes<DeviceType>::t_int_1d mask; typename AT::t_int_1d mask;
Few<double, 6> d_h_two; Few<double, 6> d_h_two;

View File

@ -45,23 +45,23 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg)
void FixPropertyAtomKokkos::grow_arrays(int nmax) void FixPropertyAtomKokkos::grow_arrays(int nmax)
{ {
for (int m = 0; m < nvalue; m++) { for (int m = 0; m < nvalue; m++) {
if (style[m] == MOLECULE) { if (styles[m] == MOLECULE) {
memory->grow(atom->molecule,nmax,"atom:molecule"); memory->grow(atom->molecule,nmax,"atom:molecule");
size_t nbytes = (nmax-nmax_old) * sizeof(tagint); size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
memset(&atom->molecule[nmax_old],0,nbytes); memset(&atom->molecule[nmax_old],0,nbytes);
} else if (style[m] == CHARGE) { } else if (styles[m] == CHARGE) {
memory->grow(atom->q,nmax,"atom:q"); memory->grow(atom->q,nmax,"atom:q");
size_t nbytes = (nmax-nmax_old) * sizeof(double); size_t nbytes = (nmax-nmax_old) * sizeof(double);
memset(&atom->q[nmax_old],0,nbytes); memset(&atom->q[nmax_old],0,nbytes);
} else if (style[m] == RMASS) { } else if (styles[m] == RMASS) {
memory->grow(atom->rmass,nmax,"atom:rmass"); memory->grow(atom->rmass,nmax,"atom:rmass");
size_t nbytes = (nmax-nmax_old) * sizeof(double); size_t nbytes = (nmax-nmax_old) * sizeof(double);
memset(&atom->rmass[nmax_old],0,nbytes); memset(&atom->rmass[nmax_old],0,nbytes);
} else if (style[m] == INTEGER) { } else if (styles[m] == INTEGER) {
memory->grow(atom->ivector[index[m]],nmax,"atom:ivector"); memory->grow(atom->ivector[index[m]],nmax,"atom:ivector");
size_t nbytes = (nmax-nmax_old) * sizeof(int); size_t nbytes = (nmax-nmax_old) * sizeof(int);
memset(&atom->ivector[index[m]][nmax_old],0,nbytes); memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
} else if (style[m] == DOUBLE) { } else if (styles[m] == DOUBLE) {
atomKK->sync(Device,DVECTOR_MASK); atomKK->sync(Device,DVECTOR_MASK);
memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax, memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
"atom:dvector"); "atom:dvector");

View File

@ -165,8 +165,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
d_neighbors.extent(1) != d_neighbors_touch.extent(1)) d_neighbors.extent(1) != d_neighbors_touch.extent(1))
d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1)); d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1));
d_firsttouch = fix_historyKK->d_firstflag; fix_historyKK->k_firstflag.template sync<DeviceType>();
d_firstshear = fix_historyKK->d_firstvalue; fix_historyKK->k_firstvalue.template sync<DeviceType>();
d_firsttouch = fix_historyKK->k_firstflag.template view<DeviceType>();
d_firstshear = fix_historyKK->k_firstvalue.template view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this); Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
@ -258,6 +261,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
} }
} }
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_global) { if (vflag_global) {
virial[0] += ev.v[0]; virial[0] += ev.v[0];
virial[1] += ev.v[1]; virial[1] += ev.v[1];

View File

@ -92,8 +92,8 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
typename AT::t_int_1d_randomread d_ilist; typename AT::t_int_1d_randomread d_ilist;
typename AT::t_int_1d_randomread d_numneigh; typename AT::t_int_1d_randomread d_numneigh;
typename Kokkos::View<int**> d_firsttouch; typename AT::t_int_2d d_firsttouch;
typename Kokkos::View<LMP_FLOAT**> d_firstshear; typename AT::t_float_2d d_firstshear;
typename AT::t_neighbors_2d d_neighbors_touch; typename AT::t_neighbors_2d d_neighbors_touch;
typename AT::t_int_1d d_numneigh_touch; typename AT::t_int_1d d_numneigh_touch;

View File

@ -69,7 +69,7 @@ PairLJCharmmCoulCharmmKokkos<DeviceType>::~PairLJCharmmCoulCharmmKokkos()
if (allocated) { if (allocated) {
memoryKK->destroy_kokkos(k_eatom,eatom); memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->destroy_kokkos(k_vatom,vatom);
k_cutsq = DAT::tdual_ffloat_2d(); memoryKK->destroy_kokkos(k_cutsq,cutsq);
} }
} }

View File

@ -1,3 +1,4 @@
// clang-format off
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories https://www.lammps.org/, Sandia National Laboratories

View File

@ -92,40 +92,22 @@ void VerletKokkos::setup(int flag)
// acquire ghosts // acquire ghosts
// build neighbor lists // build neighbor lists
atomKK->sync(Host,ALL_MASK); lmp->kokkos->auto_sync = 1;
atomKK->modified(Host,ALL_MASK);
atomKK->setup(); atom->setup();
modify->setup_pre_exchange(); modify->setup_pre_exchange();
// debug if (triclinic) domain->x2lamda(atom->nlocal);
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc(); domain->pbc();
atomKK->sync(Host,ALL_MASK);
domain->reset_box(); domain->reset_box();
comm->setup(); comm->setup();
if (neighbor->style) neighbor->setup_bins(); if (neighbor->style) neighbor->setup_bins();
comm->exchange(); comm->exchange();
if (atom->sortfreq > 0) atom->sort();
if (atomKK->sortfreq > 0) atomKK->sort();
comm->borders(); comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
atomKK->sync(Host,ALL_MASK);
domain->image_check(); domain->image_check();
domain->box_too_small_check(); domain->box_too_small_check();
modify->setup_pre_neighbor(); modify->setup_pre_neighbor();
atomKK->modified(Host,ALL_MASK);
neighbor->build(1); neighbor->build(1);
modify->setup_post_neighbor(); modify->setup_post_neighbor();
neighbor->ncalls = 0; neighbor->ncalls = 0;
@ -144,7 +126,7 @@ void VerletKokkos::setup(int flag)
} }
else if (force->pair) force->pair->compute_dummy(eflag,vflag); else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular != Atom::ATOMIC) { if (atom->molecular != Atom::ATOMIC) {
if (force->bond) { if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read); atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
force->bond->compute(eflag,vflag); force->bond->compute(eflag,vflag);
@ -200,35 +182,21 @@ void VerletKokkos::setup_minimal(int flag)
// acquire ghosts // acquire ghosts
// build neighbor lists // build neighbor lists
lmp->kokkos->auto_sync = 1;
if (flag) { if (flag) {
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
modify->setup_pre_exchange(); modify->setup_pre_exchange();
// debug if (triclinic) domain->x2lamda(atom->nlocal);
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc(); domain->pbc();
atomKK->sync(Host,ALL_MASK);
domain->reset_box(); domain->reset_box();
comm->setup(); comm->setup();
if (neighbor->style) neighbor->setup_bins(); if (neighbor->style) neighbor->setup_bins();
comm->exchange(); comm->exchange();
comm->borders(); comm->borders();
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
atomKK->sync(Host,ALL_MASK);
domain->image_check(); domain->image_check();
domain->box_too_small_check(); domain->box_too_small_check();
modify->setup_pre_neighbor(); modify->setup_pre_neighbor();
atomKK->modified(Host,ALL_MASK);
neighbor->build(1); neighbor->build(1);
modify->setup_post_neighbor(); modify->setup_post_neighbor();
neighbor->ncalls = 0; neighbor->ncalls = 0;
@ -247,7 +215,7 @@ void VerletKokkos::setup_minimal(int flag)
} }
else if (force->pair) force->pair->compute_dummy(eflag,vflag); else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular != Atom::ATOMIC) { if (atom->molecular != Atom::ATOMIC) {
if (force->bond) { if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read); atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
force->bond->compute(eflag,vflag); force->bond->compute(eflag,vflag);

View File

@ -23,7 +23,7 @@ ARCHIVE = ar
ARFLAGS = -rc ARFLAGS = -rc
SHLIBFLAGS = -shared SHLIBFLAGS = -shared
KOKKOS_DEVICES = Cuda KOKKOS_DEVICES = Cuda
KOKKOS_ARCH = Kepler35 KOKKOS_ARCH = Volta70
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL # LAMMPS-specific settings, all OPTIONAL

View File

@ -1990,7 +1990,10 @@ int FixBondReact::check_constraints()
*ptr = satisfied[i] ? '1' : '0'; *ptr = satisfied[i] ? '1' : '0';
} }
double verdict = input->variable->evaluate_boolean(evalstr); double verdict = input->variable->evaluate_boolean(evalstr);
if (verdict == 0.0) return 0; if (verdict == 0.0) {
memory->destroy(satisfied);
return 0;
}
} }
// let's also check chirality within 'check_constraint' // let's also check chirality within 'check_constraint'
@ -2012,7 +2015,10 @@ int FixBondReact::check_constraints()
} }
} }
} }
if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) return 0; if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) {
memory->destroy(satisfied);
return 0;
}
} }
} }