Merge branch 'master' into fix-ttm-grid
# Conflicts: # doc/src/pair_snap.rst
This commit is contained in:
@ -193,17 +193,17 @@ The keywords *chunksize* and *parallelthresh* are only applicable when
|
||||
using the pair style *snap* with the KOKKOS package on GPUs and are
|
||||
ignored otherwise. The *chunksize* keyword controls the number of atoms
|
||||
in each pass used to compute the bispectrum components and is used to
|
||||
avoid running out of memory. For example if there are 8192 atoms in the
|
||||
avoid running out of memory. For example if there are 8192 atoms in the
|
||||
simulation and the *chunksize* is set to 4096, the bispectrum
|
||||
calculation will be broken up into two passes (running on a single GPU).
|
||||
The *parallelthresh* keyword controls a crossover threshold for
|
||||
performing extra parallelism. For small systems, exposing additional
|
||||
performing extra parallelism. For small systems, exposing additional
|
||||
parallelism can be beneficial when there is not enough work to fully
|
||||
saturate the GPU threads otherwise. However, the extra parallelism also
|
||||
leads to more divergence and can hurt performance when the system is
|
||||
already large enough to saturate the GPU threads. Extra parallelism will
|
||||
be performed if the *chunksize* (or total number of atoms per GPU) is
|
||||
smaller than *parallelthresh*.
|
||||
already large enough to saturate the GPU threads. Extra parallelism
|
||||
will be performed if the *chunksize* (or total number of atoms per GPU)
|
||||
is smaller than *parallelthresh*.
|
||||
|
||||
Detailed definitions for all the other keywords
|
||||
are given on the :doc:`compute sna/atom <compute_sna_atom>` doc page.
|
||||
|
||||
@ -56,7 +56,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
||||
const int max_nbors, const int maxspecial,
|
||||
const double cell_size, const double gpu_split,
|
||||
FILE *_screen, const void *pair_program,
|
||||
const char *k_name) {
|
||||
const char *k_name, const int disable_fast_math) {
|
||||
screen=_screen;
|
||||
|
||||
int gpu_nbor=0;
|
||||
@ -83,7 +83,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
||||
|
||||
_block_size=device->pair_block_size();
|
||||
_block_bio_size=device->block_bio_pair();
|
||||
compile_kernels(*ucl_device,pair_program,k_name);
|
||||
compile_kernels(*ucl_device,pair_program,k_name,disable_fast_math);
|
||||
|
||||
if (_threads_per_atom>1 && gpu_nbor==0) {
|
||||
nbor->packing(true);
|
||||
@ -321,14 +321,20 @@ double BaseChargeT::host_memory_usage_atomic() const {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
const char *kname) {
|
||||
const char *kname,
|
||||
const int disable_fast_math) {
|
||||
if (_compiled)
|
||||
return;
|
||||
|
||||
std::string s_fast=std::string(kname)+"_fast";
|
||||
if (pair_program) delete pair_program;
|
||||
pair_program=new UCL_Program(dev);
|
||||
std::string oclstring = device->compile_string()+" -DEVFLAG=1";
|
||||
std::string device_compile_string;
|
||||
if (disable_fast_math)
|
||||
device_compile_string = device->compile_string_nofast();
|
||||
else
|
||||
device_compile_string = device->compile_string();
|
||||
std::string oclstring = device_compile_string+" -DEVFLAG=1";
|
||||
pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen);
|
||||
k_pair_fast.set_function(*pair_program,s_fast.c_str());
|
||||
k_pair.set_function(*pair_program,kname);
|
||||
@ -336,7 +342,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
q_tex.get_texture(*pair_program,"q_tex");
|
||||
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
oclstring = device->compile_string()+" -DEVFLAG=0";
|
||||
oclstring = device_compile_string+" -DEVFLAG=0";
|
||||
if (pair_program_noev) delete pair_program_noev;
|
||||
pair_program_noev=new UCL_Program(dev);
|
||||
pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen);
|
||||
|
||||
@ -44,6 +44,7 @@ class BaseCharge {
|
||||
* \param cell_size cutoff + skin
|
||||
* \param gpu_split fraction of particles handled by device
|
||||
* \param k_name name for the kernel for force calculation
|
||||
* \param disable_fast_math override any fast math opts for kernel JIT
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successful
|
||||
@ -54,7 +55,8 @@ class BaseCharge {
|
||||
int init_atomic(const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen,
|
||||
const void *pair_program, const char *k_name);
|
||||
const void *pair_program, const char *k_name,
|
||||
const int disable_fast_math = 0);
|
||||
|
||||
/// Estimate the overhead for GPU context changes and CPU driver
|
||||
void estimate_gpu_overhead(const int add_kernels=0);
|
||||
@ -198,7 +200,8 @@ class BaseCharge {
|
||||
double _gpu_overhead, _driver_overhead;
|
||||
UCL_D_Vec<int> *_nbor_data;
|
||||
|
||||
void compile_kernels(UCL_Device &dev, const void *pair_string, const char *k);
|
||||
void compile_kernels(UCL_Device &dev, const void *pair_string,
|
||||
const char *k, const int disable_fast_math);
|
||||
|
||||
virtual int loop(const int eflag, const int vflag) = 0;
|
||||
};
|
||||
|
||||
@ -224,7 +224,9 @@ void BaseEllipsoidT::output_times() {
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
// Workaround for timing issue on Intel OpenCL
|
||||
if (times[0] > 80e6) times[0]=0.0;
|
||||
if (times[3] > 80e6) times[3]=0.0;
|
||||
if (times[6] > 80e6) times[6]=0.0;
|
||||
#endif
|
||||
|
||||
if (device->replica_me()==0)
|
||||
@ -237,17 +239,18 @@ void BaseEllipsoidT::output_times() {
|
||||
fprintf(screen,"\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
|
||||
if (device->procs_per_gpu()==1 && times[3]>0) {
|
||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size);
|
||||
if (device->procs_per_gpu()==1 && (times[3] > 0.0)) {
|
||||
if (times[0] > 0.0)
|
||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size);
|
||||
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size);
|
||||
if (nbor->gpu_nbor()>0)
|
||||
if (nbor->gpu_nbor() > 0.0)
|
||||
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size);
|
||||
else
|
||||
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size);
|
||||
fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size);
|
||||
fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size);
|
||||
}
|
||||
if (times[6]>0)
|
||||
if (times[6] > 0.0)
|
||||
fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size);
|
||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||
fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom);
|
||||
|
||||
@ -57,7 +57,7 @@ int BornCoulWolfT::init(const int ntypes, double **host_cutsq, double **host_rho
|
||||
const double alf, const double e_shift, const double f_shift) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,born_coul_wolf,"k_born_coul_wolf");
|
||||
_screen,born_coul_wolf,"k_born_coul_wolf",1);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -42,7 +42,7 @@ int BornCoulWolfCST::init(const int ntypes, double **host_cutsq, double **host_r
|
||||
const double alf, const double e_shift, const double f_shift) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,born_coul_wolf_cs,"k_born_coul_wolf_cs");
|
||||
_screen,born_coul_wolf_cs,"k_born_coul_wolf_cs",1);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -420,6 +420,16 @@ int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
std::string DeviceT::compile_string_nofast() {
|
||||
std::string no_fast = _ocl_compile_string;
|
||||
size_t p = no_fast.find("-cl-fast-relaxed-math ");
|
||||
if (p != std::string::npos) no_fast.erase(p,22);
|
||||
p = no_fast.find("-DFAST_MATH=");
|
||||
if (p != std::string::npos) no_fast[p + 12]='0';
|
||||
return no_fast;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
const bool rot, const int nlocal,
|
||||
@ -777,28 +787,30 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
// Workaround for timing issue on Intel OpenCL
|
||||
if (times[0] > 80e6) times[0]=0.0;
|
||||
if (times[3] > 80e6) times[3]=0.0;
|
||||
if (times[5] > 80e6) times[5]=0.0;
|
||||
#endif
|
||||
|
||||
if (replica_me()==0)
|
||||
if (screen && times[6]>0.0) {
|
||||
if (screen && (times[6] > 0.0)) {
|
||||
fprintf(screen,"\n\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
fprintf(screen," Device Time Info (average): ");
|
||||
fprintf(screen,"\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
|
||||
if (time_device() && times[3]>0) {
|
||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
|
||||
if (time_device() && (times[3] > 0.0)) {
|
||||
if (times[0] > 0.0)
|
||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
|
||||
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size);
|
||||
if (nbor.gpu_nbor()>0)
|
||||
if (nbor.gpu_nbor() > 0.0)
|
||||
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size);
|
||||
else
|
||||
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size);
|
||||
fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size);
|
||||
}
|
||||
if (times[5]>0)
|
||||
if (times[5] > 0.0)
|
||||
fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
|
||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
||||
|
||||
@ -312,6 +312,7 @@ class Device {
|
||||
}
|
||||
|
||||
inline std::string compile_string() { return _ocl_compile_string; }
|
||||
std::string compile_string_nofast();
|
||||
inline std::string ocl_config_name() { return _ocl_config_name; }
|
||||
|
||||
template <class t>
|
||||
|
||||
@ -406,8 +406,8 @@ KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POW
|
||||
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||
|
||||
# Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc )
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||
|
||||
@ -13,25 +13,26 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "fix_gpu.h"
|
||||
#include <cstring>
|
||||
|
||||
#include "atom.h"
|
||||
#include "citeme.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "gpu_extra.h"
|
||||
#include "input.h"
|
||||
#include "modify.h"
|
||||
#include "neighbor.h"
|
||||
#include "pair.h"
|
||||
#include "pair_hybrid.h"
|
||||
#include "pair_hybrid_overlay.h"
|
||||
#include "respa.h"
|
||||
#include "input.h"
|
||||
#include "timer.h"
|
||||
#include "modify.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "universe.h"
|
||||
#include "gpu_extra.h"
|
||||
#include "neighbor.h"
|
||||
#include "citeme.h"
|
||||
#include "error.h"
|
||||
#include "update.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#include <omp.h>
|
||||
@ -275,12 +276,15 @@ void FixGPU::init()
|
||||
error->warning(FLERR,"Using package gpu without any pair style defined");
|
||||
|
||||
// make sure fdotr virial is not accumulated multiple times
|
||||
// also disallow GPU neighbor lists for hybrid styles
|
||||
|
||||
if (force->pair_match("^hybrid",0) != nullptr) {
|
||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
|
||||
force->pair->no_virial_fdotr_compute = 1;
|
||||
if (_gpu_mode != GPU_FORCE)
|
||||
error->all(FLERR, "Must not use GPU neighbor lists with hybrid pair style");
|
||||
}
|
||||
|
||||
// rRESPA support
|
||||
@ -295,8 +299,7 @@ void FixGPU::setup(int vflag)
|
||||
{
|
||||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
||||
if (neighbor->exclude_setting() != 0)
|
||||
error->all(FLERR,
|
||||
"Cannot use neigh_modify exclude with GPU neighbor builds");
|
||||
error->all(FLERR, "Cannot use neigh_modify exclude with GPU neighbor builds");
|
||||
|
||||
if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag);
|
||||
else {
|
||||
|
||||
@ -30,7 +30,7 @@ class FixFreeze : public Fix {
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void post_force(int);
|
||||
virtual void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
double compute_vector(int);
|
||||
|
||||
|
||||
@ -20,10 +20,6 @@
|
||||
#define USE_OMP_SIMD
|
||||
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
|
||||
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
|
||||
#define _MM_SCALE_1 1
|
||||
#define _MM_SCALE_2 2
|
||||
#define _MM_SCALE_4 4
|
||||
#define _MM_SCALE_8 8
|
||||
#endif
|
||||
|
||||
#ifdef __INTEL_COMPILER
|
||||
|
||||
@ -35,6 +35,13 @@ authors for more details.
|
||||
|
||||
#ifdef __AVX512F__
|
||||
|
||||
#ifndef _MM_SCALE_1
|
||||
#define _MM_SCALE_1 1
|
||||
#define _MM_SCALE_2 2
|
||||
#define _MM_SCALE_4 4
|
||||
#define _MM_SCALE_8 8
|
||||
#endif
|
||||
|
||||
namespace ip_simd {
|
||||
|
||||
typedef __mmask16 SIMD_mask;
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
@ -20,6 +19,7 @@ ComputeStyle(temp/deform/kk/host,ComputeTempDeformKokkos<LMPHostType>);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
// clang-format off
|
||||
#ifndef LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
||||
#define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
||||
|
||||
|
||||
@ -28,41 +28,16 @@ FixFreezeKokkos<DeviceType>::FixFreezeKokkos(LAMMPS *lmp, int narg, char **arg)
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = F_MASK | MASK_MASK;
|
||||
datamask_read = F_MASK | MASK_MASK | TORQUE_MASK;
|
||||
datamask_modify = F_MASK | TORQUE_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixFreezeKokkos<DeviceType>::setmask()
|
||||
{
|
||||
return FixFreeze::setmask();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::init()
|
||||
{
|
||||
FixFreeze::init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::setup(int vflag)
|
||||
{
|
||||
FixFreeze::setup(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
torque = atomKK->k_torque.view<DeviceType>();
|
||||
@ -80,28 +55,10 @@ void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
foriginal[0] = original.values[0];
|
||||
foriginal[1] = original.values[1];
|
||||
foriginal[2] = original.values[2];
|
||||
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::post_force_respa(int vflag, int /*ilevel*/, int /*iloop*/)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
return components of total force on fix group before force was changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double FixFreezeKokkos<DeviceType>::compute_vector(int n)
|
||||
{
|
||||
return FixFreeze::compute_vector(n);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const {
|
||||
|
||||
@ -31,6 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class FixFreezeKokkos : public FixFreeze {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
struct OriginalForce {
|
||||
double values[3];
|
||||
|
||||
@ -58,12 +59,7 @@ class FixFreezeKokkos : public FixFreeze {
|
||||
};
|
||||
|
||||
FixFreezeKokkos(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
double compute_vector(int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, OriginalForce &original) const;
|
||||
|
||||
@ -87,6 +87,9 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
|
||||
{
|
||||
copymode = 1;
|
||||
|
||||
k_firstflag.sync<DeviceType>();
|
||||
k_firstvalue.sync<DeviceType>();
|
||||
|
||||
h_resize() = 1;
|
||||
while (h_resize() > 0) {
|
||||
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
|
||||
@ -168,6 +171,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
{
|
||||
tag = atomKK->k_tag.view<DeviceType>();
|
||||
|
||||
k_firstflag.sync<DeviceType>();
|
||||
k_firstvalue.sync<DeviceType>();
|
||||
|
||||
int inum = pair->list->inum;
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
@ -185,8 +191,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
|
||||
if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) {
|
||||
maxatom = nall;
|
||||
d_firstflag = Kokkos::View<int**>("neighbor_history:firstflag",maxatom,k_list->maxneighs);
|
||||
d_firstvalue = Kokkos::View<LMP_FLOAT**>("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
|
||||
k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs);
|
||||
k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
|
||||
d_firstflag = k_firstflag.view<DeviceType>();
|
||||
d_firstvalue = k_firstvalue.view<DeviceType>();
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
@ -194,6 +202,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(inum,f);
|
||||
|
||||
k_firstflag.modify<DeviceType>();
|
||||
k_firstvalue.modify<DeviceType>();
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -50,10 +50,13 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void post_neighbor_item(const int &ii) const;
|
||||
|
||||
typename Kokkos::View<int**> d_firstflag;
|
||||
typename Kokkos::View<LMP_FLOAT**> d_firstvalue;
|
||||
typename DAT::tdual_int_2d k_firstflag;
|
||||
typename DAT::tdual_float_2d k_firstvalue;
|
||||
|
||||
private:
|
||||
typename ArrayTypes<DeviceType>::t_int_2d d_firstflag;
|
||||
typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
|
||||
typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
|
||||
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
|
||||
@ -74,6 +77,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -84,6 +88,7 @@ struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosPreExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -94,6 +99,7 @@ struct FixNeighHistoryKokkosPreExchangeFunctor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosPostNeighborFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -31,8 +31,8 @@ FixNVESphereKokkos<DeviceType>::FixNVESphereKokkos(LAMMPS *lmp, int narg, char *
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK;
|
||||
datamask_modify = X_MASK | V_MASK | OMEGA_MASK;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -61,8 +61,7 @@ void FixNVESphereKokkos<DeviceType>::init()
|
||||
template<class DeviceType>
|
||||
void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
@ -78,6 +77,8 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
|
||||
FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
|
||||
atomKK->modified(execution_space, X_MASK | V_MASK | OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -109,8 +110,7 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate_item(const int i) const
|
||||
template<class DeviceType>
|
||||
void FixNVESphereKokkos<DeviceType>::final_integrate()
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
atomKK->sync(execution_space, V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
|
||||
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
omega = atomKK->k_omega.view<DeviceType>();
|
||||
@ -125,6 +125,8 @@ void FixNVESphereKokkos<DeviceType>::final_integrate()
|
||||
|
||||
FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
|
||||
atomKK->modified(execution_space, V_MASK | OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -56,6 +56,7 @@ class FixNVESphereKokkos : public FixNVESphere {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNVESphereKokkosInitialIntegrateFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNVESphereKokkos<DeviceType> c;
|
||||
FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -66,6 +67,7 @@ struct FixNVESphereKokkosInitialIntegrateFunctor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNVESphereKokkosFinalIntegrateFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNVESphereKokkos<DeviceType> c;
|
||||
FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -65,8 +65,6 @@ void FixNVTSllodKokkos<DeviceType>::init()
|
||||
{
|
||||
FixNHKokkos<DeviceType>::init();
|
||||
|
||||
vdelu = typename ArrayTypes<DeviceType>::t_v_array("nvt/sllod/kk:vdelu", atomKK->nlocal);
|
||||
|
||||
if (!this->temperature->tempbias)
|
||||
this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias");
|
||||
|
||||
@ -100,7 +98,7 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
|
||||
// calculate temperature since some computes require temp
|
||||
// computed on current nlocal atoms to remove bias
|
||||
|
||||
if (nondeformbias){
|
||||
if (nondeformbias) {
|
||||
atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read);
|
||||
this->temperature->compute_scalar();
|
||||
atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify);
|
||||
@ -115,6 +113,9 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
|
||||
|
||||
d_h_two = Few<double, 6>(h_two);
|
||||
|
||||
if (vdelu.extent(0) < atomKK->nmax)
|
||||
vdelu = typename AT::t_v_array(Kokkos::NoInit("nvt/sllod/kk:vdelu"), atomKK->nmax);
|
||||
|
||||
this->copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNVTSllod_temp1>(0,nlocal),*this);
|
||||
this->copymode = 0;
|
||||
|
||||
@ -35,6 +35,9 @@ struct TagFixNVTSllod_temp2{};
|
||||
template<class DeviceType>
|
||||
class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
FixNVTSllodKokkos(class LAMMPS *, int, char **);
|
||||
~FixNVTSllodKokkos() {}
|
||||
void init();
|
||||
@ -51,14 +54,14 @@ class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
|
||||
void nh_v_temp();
|
||||
|
||||
protected:
|
||||
typename ArrayTypes<DeviceType>::t_x_array x;
|
||||
typename ArrayTypes<DeviceType>::t_v_array v;
|
||||
typename ArrayTypes<DeviceType>::t_v_array vdelu;
|
||||
typename ArrayTypes<DeviceType>::t_f_array_const f;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d rmass;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d mass;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_v_array v;
|
||||
typename AT::t_v_array vdelu;
|
||||
typename AT::t_f_array_const f;
|
||||
typename AT::t_float_1d rmass;
|
||||
typename AT::t_float_1d mass;
|
||||
typename AT::t_int_1d type;
|
||||
typename AT::t_int_1d mask;
|
||||
|
||||
Few<double, 6> d_h_two;
|
||||
|
||||
|
||||
@ -45,23 +45,23 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg)
|
||||
void FixPropertyAtomKokkos::grow_arrays(int nmax)
|
||||
{
|
||||
for (int m = 0; m < nvalue; m++) {
|
||||
if (style[m] == MOLECULE) {
|
||||
if (styles[m] == MOLECULE) {
|
||||
memory->grow(atom->molecule,nmax,"atom:molecule");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
|
||||
memset(&atom->molecule[nmax_old],0,nbytes);
|
||||
} else if (style[m] == CHARGE) {
|
||||
} else if (styles[m] == CHARGE) {
|
||||
memory->grow(atom->q,nmax,"atom:q");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
memset(&atom->q[nmax_old],0,nbytes);
|
||||
} else if (style[m] == RMASS) {
|
||||
} else if (styles[m] == RMASS) {
|
||||
memory->grow(atom->rmass,nmax,"atom:rmass");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
memset(&atom->rmass[nmax_old],0,nbytes);
|
||||
} else if (style[m] == INTEGER) {
|
||||
} else if (styles[m] == INTEGER) {
|
||||
memory->grow(atom->ivector[index[m]],nmax,"atom:ivector");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(int);
|
||||
memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
|
||||
} else if (style[m] == DOUBLE) {
|
||||
} else if (styles[m] == DOUBLE) {
|
||||
atomKK->sync(Device,DVECTOR_MASK);
|
||||
memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
|
||||
"atom:dvector");
|
||||
|
||||
@ -165,8 +165,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
d_neighbors.extent(1) != d_neighbors_touch.extent(1))
|
||||
d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1));
|
||||
|
||||
d_firsttouch = fix_historyKK->d_firstflag;
|
||||
d_firstshear = fix_historyKK->d_firstvalue;
|
||||
fix_historyKK->k_firstflag.template sync<DeviceType>();
|
||||
fix_historyKK->k_firstvalue.template sync<DeviceType>();
|
||||
|
||||
d_firsttouch = fix_historyKK->k_firstflag.template view<DeviceType>();
|
||||
d_firstshear = fix_historyKK->k_firstvalue.template view<DeviceType>();
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
|
||||
|
||||
@ -258,6 +261,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
}
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
|
||||
@ -92,8 +92,8 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
|
||||
typename Kokkos::View<int**> d_firsttouch;
|
||||
typename Kokkos::View<LMP_FLOAT**> d_firstshear;
|
||||
typename AT::t_int_2d d_firsttouch;
|
||||
typename AT::t_float_2d d_firstshear;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors_touch;
|
||||
typename AT::t_int_1d d_numneigh_touch;
|
||||
|
||||
@ -69,7 +69,7 @@ PairLJCharmmCoulCharmmKokkos<DeviceType>::~PairLJCharmmCoulCharmmKokkos()
|
||||
if (allocated) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
k_cutsq = DAT::tdual_ffloat_2d();
|
||||
memoryKK->destroy_kokkos(k_cutsq,cutsq);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
|
||||
@ -92,40 +92,22 @@ void VerletKokkos::setup(int flag)
|
||||
// acquire ghosts
|
||||
// build neighbor lists
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
|
||||
atomKK->setup();
|
||||
atom->setup();
|
||||
modify->setup_pre_exchange();
|
||||
// debug
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||
domain->pbc();
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
|
||||
comm->exchange();
|
||||
|
||||
if (atomKK->sortfreq > 0) atomKK->sort();
|
||||
|
||||
if (atom->sortfreq > 0) atom->sort();
|
||||
comm->borders();
|
||||
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||
domain->image_check();
|
||||
domain->box_too_small_check();
|
||||
modify->setup_pre_neighbor();
|
||||
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
neighbor->build(1);
|
||||
modify->setup_post_neighbor();
|
||||
neighbor->ncalls = 0;
|
||||
@ -144,7 +126,7 @@ void VerletKokkos::setup(int flag)
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular != Atom::ATOMIC) {
|
||||
if (atom->molecular != Atom::ATOMIC) {
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
force->bond->compute(eflag,vflag);
|
||||
@ -200,35 +182,21 @@ void VerletKokkos::setup_minimal(int flag)
|
||||
// acquire ghosts
|
||||
// build neighbor lists
|
||||
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
|
||||
if (flag) {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
modify->setup_pre_exchange();
|
||||
// debug
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||
domain->pbc();
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
comm->exchange();
|
||||
comm->borders();
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||
domain->image_check();
|
||||
domain->box_too_small_check();
|
||||
modify->setup_pre_neighbor();
|
||||
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
neighbor->build(1);
|
||||
modify->setup_post_neighbor();
|
||||
neighbor->ncalls = 0;
|
||||
@ -247,7 +215,7 @@ void VerletKokkos::setup_minimal(int flag)
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular != Atom::ATOMIC) {
|
||||
if (atom->molecular != Atom::ATOMIC) {
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
force->bond->compute(eflag,vflag);
|
||||
|
||||
@ -23,7 +23,7 @@ ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
KOKKOS_DEVICES = Cuda
|
||||
KOKKOS_ARCH = Kepler35
|
||||
KOKKOS_ARCH = Volta70
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings, all OPTIONAL
|
||||
|
||||
@ -1990,7 +1990,10 @@ int FixBondReact::check_constraints()
|
||||
*ptr = satisfied[i] ? '1' : '0';
|
||||
}
|
||||
double verdict = input->variable->evaluate_boolean(evalstr);
|
||||
if (verdict == 0.0) return 0;
|
||||
if (verdict == 0.0) {
|
||||
memory->destroy(satisfied);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// let's also check chirality within 'check_constraint'
|
||||
@ -2012,7 +2015,10 @@ int FixBondReact::check_constraints()
|
||||
}
|
||||
}
|
||||
}
|
||||
if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) return 0;
|
||||
if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) {
|
||||
memory->destroy(satisfied);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user