Merge branch 'master' into fix-ttm-grid
# Conflicts: # doc/src/pair_snap.rst
This commit is contained in:
@ -193,17 +193,17 @@ The keywords *chunksize* and *parallelthresh* are only applicable when
|
|||||||
using the pair style *snap* with the KOKKOS package on GPUs and are
|
using the pair style *snap* with the KOKKOS package on GPUs and are
|
||||||
ignored otherwise. The *chunksize* keyword controls the number of atoms
|
ignored otherwise. The *chunksize* keyword controls the number of atoms
|
||||||
in each pass used to compute the bispectrum components and is used to
|
in each pass used to compute the bispectrum components and is used to
|
||||||
avoid running out of memory. For example if there are 8192 atoms in the
|
avoid running out of memory. For example if there are 8192 atoms in the
|
||||||
simulation and the *chunksize* is set to 4096, the bispectrum
|
simulation and the *chunksize* is set to 4096, the bispectrum
|
||||||
calculation will be broken up into two passes (running on a single GPU).
|
calculation will be broken up into two passes (running on a single GPU).
|
||||||
The *parallelthresh* keyword controls a crossover threshold for
|
The *parallelthresh* keyword controls a crossover threshold for
|
||||||
performing extra parallelism. For small systems, exposing additional
|
performing extra parallelism. For small systems, exposing additional
|
||||||
parallelism can be beneficial when there is not enough work to fully
|
parallelism can be beneficial when there is not enough work to fully
|
||||||
saturate the GPU threads otherwise. However, the extra parallelism also
|
saturate the GPU threads otherwise. However, the extra parallelism also
|
||||||
leads to more divergence and can hurt performance when the system is
|
leads to more divergence and can hurt performance when the system is
|
||||||
already large enough to saturate the GPU threads. Extra parallelism will
|
already large enough to saturate the GPU threads. Extra parallelism
|
||||||
be performed if the *chunksize* (or total number of atoms per GPU) is
|
will be performed if the *chunksize* (or total number of atoms per GPU)
|
||||||
smaller than *parallelthresh*.
|
is smaller than *parallelthresh*.
|
||||||
|
|
||||||
Detailed definitions for all the other keywords
|
Detailed definitions for all the other keywords
|
||||||
are given on the :doc:`compute sna/atom <compute_sna_atom>` doc page.
|
are given on the :doc:`compute sna/atom <compute_sna_atom>` doc page.
|
||||||
|
|||||||
@ -56,7 +56,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
|||||||
const int max_nbors, const int maxspecial,
|
const int max_nbors, const int maxspecial,
|
||||||
const double cell_size, const double gpu_split,
|
const double cell_size, const double gpu_split,
|
||||||
FILE *_screen, const void *pair_program,
|
FILE *_screen, const void *pair_program,
|
||||||
const char *k_name) {
|
const char *k_name, const int disable_fast_math) {
|
||||||
screen=_screen;
|
screen=_screen;
|
||||||
|
|
||||||
int gpu_nbor=0;
|
int gpu_nbor=0;
|
||||||
@ -83,7 +83,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
|||||||
|
|
||||||
_block_size=device->pair_block_size();
|
_block_size=device->pair_block_size();
|
||||||
_block_bio_size=device->block_bio_pair();
|
_block_bio_size=device->block_bio_pair();
|
||||||
compile_kernels(*ucl_device,pair_program,k_name);
|
compile_kernels(*ucl_device,pair_program,k_name,disable_fast_math);
|
||||||
|
|
||||||
if (_threads_per_atom>1 && gpu_nbor==0) {
|
if (_threads_per_atom>1 && gpu_nbor==0) {
|
||||||
nbor->packing(true);
|
nbor->packing(true);
|
||||||
@ -321,14 +321,20 @@ double BaseChargeT::host_memory_usage_atomic() const {
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||||
const char *kname) {
|
const char *kname,
|
||||||
|
const int disable_fast_math) {
|
||||||
if (_compiled)
|
if (_compiled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
std::string s_fast=std::string(kname)+"_fast";
|
std::string s_fast=std::string(kname)+"_fast";
|
||||||
if (pair_program) delete pair_program;
|
if (pair_program) delete pair_program;
|
||||||
pair_program=new UCL_Program(dev);
|
pair_program=new UCL_Program(dev);
|
||||||
std::string oclstring = device->compile_string()+" -DEVFLAG=1";
|
std::string device_compile_string;
|
||||||
|
if (disable_fast_math)
|
||||||
|
device_compile_string = device->compile_string_nofast();
|
||||||
|
else
|
||||||
|
device_compile_string = device->compile_string();
|
||||||
|
std::string oclstring = device_compile_string+" -DEVFLAG=1";
|
||||||
pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen);
|
pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen);
|
||||||
k_pair_fast.set_function(*pair_program,s_fast.c_str());
|
k_pair_fast.set_function(*pair_program,s_fast.c_str());
|
||||||
k_pair.set_function(*pair_program,kname);
|
k_pair.set_function(*pair_program,kname);
|
||||||
@ -336,7 +342,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
|||||||
q_tex.get_texture(*pair_program,"q_tex");
|
q_tex.get_texture(*pair_program,"q_tex");
|
||||||
|
|
||||||
#if defined(LAL_OCL_EV_JIT)
|
#if defined(LAL_OCL_EV_JIT)
|
||||||
oclstring = device->compile_string()+" -DEVFLAG=0";
|
oclstring = device_compile_string+" -DEVFLAG=0";
|
||||||
if (pair_program_noev) delete pair_program_noev;
|
if (pair_program_noev) delete pair_program_noev;
|
||||||
pair_program_noev=new UCL_Program(dev);
|
pair_program_noev=new UCL_Program(dev);
|
||||||
pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen);
|
pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen);
|
||||||
|
|||||||
@ -44,6 +44,7 @@ class BaseCharge {
|
|||||||
* \param cell_size cutoff + skin
|
* \param cell_size cutoff + skin
|
||||||
* \param gpu_split fraction of particles handled by device
|
* \param gpu_split fraction of particles handled by device
|
||||||
* \param k_name name for the kernel for force calculation
|
* \param k_name name for the kernel for force calculation
|
||||||
|
* \param disable_fast_math override any fast math opts for kernel JIT
|
||||||
*
|
*
|
||||||
* Returns:
|
* Returns:
|
||||||
* - 0 if successful
|
* - 0 if successful
|
||||||
@ -54,7 +55,8 @@ class BaseCharge {
|
|||||||
int init_atomic(const int nlocal, const int nall, const int max_nbors,
|
int init_atomic(const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const double cell_size,
|
const int maxspecial, const double cell_size,
|
||||||
const double gpu_split, FILE *screen,
|
const double gpu_split, FILE *screen,
|
||||||
const void *pair_program, const char *k_name);
|
const void *pair_program, const char *k_name,
|
||||||
|
const int disable_fast_math = 0);
|
||||||
|
|
||||||
/// Estimate the overhead for GPU context changes and CPU driver
|
/// Estimate the overhead for GPU context changes and CPU driver
|
||||||
void estimate_gpu_overhead(const int add_kernels=0);
|
void estimate_gpu_overhead(const int add_kernels=0);
|
||||||
@ -198,7 +200,8 @@ class BaseCharge {
|
|||||||
double _gpu_overhead, _driver_overhead;
|
double _gpu_overhead, _driver_overhead;
|
||||||
UCL_D_Vec<int> *_nbor_data;
|
UCL_D_Vec<int> *_nbor_data;
|
||||||
|
|
||||||
void compile_kernels(UCL_Device &dev, const void *pair_string, const char *k);
|
void compile_kernels(UCL_Device &dev, const void *pair_string,
|
||||||
|
const char *k, const int disable_fast_math);
|
||||||
|
|
||||||
virtual int loop(const int eflag, const int vflag) = 0;
|
virtual int loop(const int eflag, const int vflag) = 0;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -224,7 +224,9 @@ void BaseEllipsoidT::output_times() {
|
|||||||
|
|
||||||
#ifdef USE_OPENCL
|
#ifdef USE_OPENCL
|
||||||
// Workaround for timing issue on Intel OpenCL
|
// Workaround for timing issue on Intel OpenCL
|
||||||
|
if (times[0] > 80e6) times[0]=0.0;
|
||||||
if (times[3] > 80e6) times[3]=0.0;
|
if (times[3] > 80e6) times[3]=0.0;
|
||||||
|
if (times[6] > 80e6) times[6]=0.0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (device->replica_me()==0)
|
if (device->replica_me()==0)
|
||||||
@ -237,17 +239,18 @@ void BaseEllipsoidT::output_times() {
|
|||||||
fprintf(screen,"\n-------------------------------------");
|
fprintf(screen,"\n-------------------------------------");
|
||||||
fprintf(screen,"--------------------------------\n");
|
fprintf(screen,"--------------------------------\n");
|
||||||
|
|
||||||
if (device->procs_per_gpu()==1 && times[3]>0) {
|
if (device->procs_per_gpu()==1 && (times[3] > 0.0)) {
|
||||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size);
|
if (times[0] > 0.0)
|
||||||
|
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size);
|
||||||
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size);
|
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size);
|
||||||
if (nbor->gpu_nbor()>0)
|
if (nbor->gpu_nbor() > 0.0)
|
||||||
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size);
|
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size);
|
||||||
else
|
else
|
||||||
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size);
|
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size);
|
||||||
fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size);
|
fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size);
|
||||||
fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size);
|
fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size);
|
||||||
}
|
}
|
||||||
if (times[6]>0)
|
if (times[6] > 0.0)
|
||||||
fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size);
|
fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size);
|
||||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||||
fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom);
|
fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom);
|
||||||
|
|||||||
@ -57,7 +57,7 @@ int BornCoulWolfT::init(const int ntypes, double **host_cutsq, double **host_rho
|
|||||||
const double alf, const double e_shift, const double f_shift) {
|
const double alf, const double e_shift, const double f_shift) {
|
||||||
int success;
|
int success;
|
||||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||||
_screen,born_coul_wolf,"k_born_coul_wolf");
|
_screen,born_coul_wolf,"k_born_coul_wolf",1);
|
||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
|
|||||||
@ -42,7 +42,7 @@ int BornCoulWolfCST::init(const int ntypes, double **host_cutsq, double **host_r
|
|||||||
const double alf, const double e_shift, const double f_shift) {
|
const double alf, const double e_shift, const double f_shift) {
|
||||||
int success;
|
int success;
|
||||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||||
_screen,born_coul_wolf_cs,"k_born_coul_wolf_cs");
|
_screen,born_coul_wolf_cs,"k_born_coul_wolf_cs",1);
|
||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
|
|||||||
@ -420,6 +420,16 @@ int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
std::string DeviceT::compile_string_nofast() {
|
||||||
|
std::string no_fast = _ocl_compile_string;
|
||||||
|
size_t p = no_fast.find("-cl-fast-relaxed-math ");
|
||||||
|
if (p != std::string::npos) no_fast.erase(p,22);
|
||||||
|
p = no_fast.find("-DFAST_MATH=");
|
||||||
|
if (p != std::string::npos) no_fast[p + 12]='0';
|
||||||
|
return no_fast;
|
||||||
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||||
const bool rot, const int nlocal,
|
const bool rot, const int nlocal,
|
||||||
@ -777,28 +787,30 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
|||||||
|
|
||||||
#ifdef USE_OPENCL
|
#ifdef USE_OPENCL
|
||||||
// Workaround for timing issue on Intel OpenCL
|
// Workaround for timing issue on Intel OpenCL
|
||||||
|
if (times[0] > 80e6) times[0]=0.0;
|
||||||
if (times[3] > 80e6) times[3]=0.0;
|
if (times[3] > 80e6) times[3]=0.0;
|
||||||
if (times[5] > 80e6) times[5]=0.0;
|
if (times[5] > 80e6) times[5]=0.0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (replica_me()==0)
|
if (replica_me()==0)
|
||||||
if (screen && times[6]>0.0) {
|
if (screen && (times[6] > 0.0)) {
|
||||||
fprintf(screen,"\n\n-------------------------------------");
|
fprintf(screen,"\n\n-------------------------------------");
|
||||||
fprintf(screen,"--------------------------------\n");
|
fprintf(screen,"--------------------------------\n");
|
||||||
fprintf(screen," Device Time Info (average): ");
|
fprintf(screen," Device Time Info (average): ");
|
||||||
fprintf(screen,"\n-------------------------------------");
|
fprintf(screen,"\n-------------------------------------");
|
||||||
fprintf(screen,"--------------------------------\n");
|
fprintf(screen,"--------------------------------\n");
|
||||||
|
|
||||||
if (time_device() && times[3]>0) {
|
if (time_device() && (times[3] > 0.0)) {
|
||||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
|
if (times[0] > 0.0)
|
||||||
|
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
|
||||||
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size);
|
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size);
|
||||||
if (nbor.gpu_nbor()>0)
|
if (nbor.gpu_nbor() > 0.0)
|
||||||
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size);
|
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size);
|
||||||
else
|
else
|
||||||
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size);
|
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size);
|
||||||
fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size);
|
fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size);
|
||||||
}
|
}
|
||||||
if (times[5]>0)
|
if (times[5] > 0.0)
|
||||||
fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
|
fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
|
||||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||||
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
||||||
|
|||||||
@ -312,6 +312,7 @@ class Device {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline std::string compile_string() { return _ocl_compile_string; }
|
inline std::string compile_string() { return _ocl_compile_string; }
|
||||||
|
std::string compile_string_nofast();
|
||||||
inline std::string ocl_config_name() { return _ocl_config_name; }
|
inline std::string ocl_config_name() { return _ocl_config_name; }
|
||||||
|
|
||||||
template <class t>
|
template <class t>
|
||||||
|
|||||||
@ -406,8 +406,8 @@ KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POW
|
|||||||
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||||
|
|
||||||
# Incompatible flags?
|
# Incompatible flags?
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
|
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc )
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||||
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||||
|
|||||||
@ -13,25 +13,26 @@
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "fix_gpu.h"
|
#include "fix_gpu.h"
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
|
#include "citeme.h"
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
|
#include "domain.h"
|
||||||
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
|
#include "gpu_extra.h"
|
||||||
|
#include "input.h"
|
||||||
|
#include "modify.h"
|
||||||
|
#include "neighbor.h"
|
||||||
#include "pair.h"
|
#include "pair.h"
|
||||||
#include "pair_hybrid.h"
|
#include "pair_hybrid.h"
|
||||||
#include "pair_hybrid_overlay.h"
|
#include "pair_hybrid_overlay.h"
|
||||||
#include "respa.h"
|
#include "respa.h"
|
||||||
#include "input.h"
|
|
||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
#include "modify.h"
|
|
||||||
#include "update.h"
|
|
||||||
#include "domain.h"
|
|
||||||
#include "universe.h"
|
#include "universe.h"
|
||||||
#include "gpu_extra.h"
|
#include "update.h"
|
||||||
#include "neighbor.h"
|
|
||||||
#include "citeme.h"
|
#include <cstring>
|
||||||
#include "error.h"
|
|
||||||
|
|
||||||
#if (LAL_USE_OMP == 1)
|
#if (LAL_USE_OMP == 1)
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
@ -275,12 +276,15 @@ void FixGPU::init()
|
|||||||
error->warning(FLERR,"Using package gpu without any pair style defined");
|
error->warning(FLERR,"Using package gpu without any pair style defined");
|
||||||
|
|
||||||
// make sure fdotr virial is not accumulated multiple times
|
// make sure fdotr virial is not accumulated multiple times
|
||||||
|
// also disallow GPU neighbor lists for hybrid styles
|
||||||
|
|
||||||
if (force->pair_match("^hybrid",0) != nullptr) {
|
if (force->pair_match("^hybrid",0) != nullptr) {
|
||||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||||
for (int i = 0; i < hybrid->nstyles; i++)
|
for (int i = 0; i < hybrid->nstyles; i++)
|
||||||
if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
|
if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
|
||||||
force->pair->no_virial_fdotr_compute = 1;
|
force->pair->no_virial_fdotr_compute = 1;
|
||||||
|
if (_gpu_mode != GPU_FORCE)
|
||||||
|
error->all(FLERR, "Must not use GPU neighbor lists with hybrid pair style");
|
||||||
}
|
}
|
||||||
|
|
||||||
// rRESPA support
|
// rRESPA support
|
||||||
@ -295,8 +299,7 @@ void FixGPU::setup(int vflag)
|
|||||||
{
|
{
|
||||||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
||||||
if (neighbor->exclude_setting() != 0)
|
if (neighbor->exclude_setting() != 0)
|
||||||
error->all(FLERR,
|
error->all(FLERR, "Cannot use neigh_modify exclude with GPU neighbor builds");
|
||||||
"Cannot use neigh_modify exclude with GPU neighbor builds");
|
|
||||||
|
|
||||||
if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag);
|
if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag);
|
||||||
else {
|
else {
|
||||||
|
|||||||
@ -30,7 +30,7 @@ class FixFreeze : public Fix {
|
|||||||
int setmask();
|
int setmask();
|
||||||
void init();
|
void init();
|
||||||
void setup(int);
|
void setup(int);
|
||||||
void post_force(int);
|
virtual void post_force(int);
|
||||||
void post_force_respa(int, int, int);
|
void post_force_respa(int, int, int);
|
||||||
double compute_vector(int);
|
double compute_vector(int);
|
||||||
|
|
||||||
|
|||||||
@ -20,10 +20,6 @@
|
|||||||
#define USE_OMP_SIMD
|
#define USE_OMP_SIMD
|
||||||
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
|
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
|
||||||
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
|
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
|
||||||
#define _MM_SCALE_1 1
|
|
||||||
#define _MM_SCALE_2 2
|
|
||||||
#define _MM_SCALE_4 4
|
|
||||||
#define _MM_SCALE_8 8
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __INTEL_COMPILER
|
#ifdef __INTEL_COMPILER
|
||||||
|
|||||||
@ -35,6 +35,13 @@ authors for more details.
|
|||||||
|
|
||||||
#ifdef __AVX512F__
|
#ifdef __AVX512F__
|
||||||
|
|
||||||
|
#ifndef _MM_SCALE_1
|
||||||
|
#define _MM_SCALE_1 1
|
||||||
|
#define _MM_SCALE_2 2
|
||||||
|
#define _MM_SCALE_4 4
|
||||||
|
#define _MM_SCALE_8 8
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace ip_simd {
|
namespace ip_simd {
|
||||||
|
|
||||||
typedef __mmask16 SIMD_mask;
|
typedef __mmask16 SIMD_mask;
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
// clang-format off
|
|
||||||
/* -*- c++ -*- ----------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/, Sandia National Laboratories
|
https://www.lammps.org/, Sandia National Laboratories
|
||||||
@ -20,6 +19,7 @@ ComputeStyle(temp/deform/kk/host,ComputeTempDeformKokkos<LMPHostType>);
|
|||||||
// clang-format on
|
// clang-format on
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
#ifndef LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
#ifndef LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
||||||
#define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
#define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
||||||
|
|
||||||
|
|||||||
@ -28,41 +28,16 @@ FixFreezeKokkos<DeviceType>::FixFreezeKokkos(LAMMPS *lmp, int narg, char **arg)
|
|||||||
atomKK = (AtomKokkos *)atom;
|
atomKK = (AtomKokkos *)atom;
|
||||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||||
|
|
||||||
datamask_read = F_MASK | MASK_MASK;
|
datamask_read = F_MASK | MASK_MASK | TORQUE_MASK;
|
||||||
datamask_modify = F_MASK | TORQUE_MASK;
|
datamask_modify = F_MASK | TORQUE_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
int FixFreezeKokkos<DeviceType>::setmask()
|
|
||||||
{
|
|
||||||
return FixFreeze::setmask();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
void FixFreezeKokkos<DeviceType>::init()
|
|
||||||
{
|
|
||||||
FixFreeze::init();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
void FixFreezeKokkos<DeviceType>::setup(int vflag)
|
|
||||||
{
|
|
||||||
FixFreeze::setup(vflag);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
|
void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||||
{
|
{
|
||||||
atomKK->sync(execution_space,datamask_read);
|
atomKK->sync(execution_space,datamask_read);
|
||||||
atomKK->modified(execution_space,datamask_modify);
|
|
||||||
|
|
||||||
f = atomKK->k_f.view<DeviceType>();
|
f = atomKK->k_f.view<DeviceType>();
|
||||||
torque = atomKK->k_torque.view<DeviceType>();
|
torque = atomKK->k_torque.view<DeviceType>();
|
||||||
@ -80,28 +55,10 @@ void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
|
|||||||
foriginal[0] = original.values[0];
|
foriginal[0] = original.values[0];
|
||||||
foriginal[1] = original.values[1];
|
foriginal[1] = original.values[1];
|
||||||
foriginal[2] = original.values[2];
|
foriginal[2] = original.values[2];
|
||||||
|
|
||||||
|
atomKK->modified(execution_space,datamask_modify);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
void FixFreezeKokkos<DeviceType>::post_force_respa(int vflag, int /*ilevel*/, int /*iloop*/)
|
|
||||||
{
|
|
||||||
post_force(vflag);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
return components of total force on fix group before force was changed
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
double FixFreezeKokkos<DeviceType>::compute_vector(int n)
|
|
||||||
{
|
|
||||||
return FixFreeze::compute_vector(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const {
|
void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const {
|
||||||
|
|||||||
@ -31,6 +31,7 @@ namespace LAMMPS_NS {
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
class FixFreezeKokkos : public FixFreeze {
|
class FixFreezeKokkos : public FixFreeze {
|
||||||
public:
|
public:
|
||||||
|
typedef DeviceType device_type;
|
||||||
struct OriginalForce {
|
struct OriginalForce {
|
||||||
double values[3];
|
double values[3];
|
||||||
|
|
||||||
@ -58,12 +59,7 @@ class FixFreezeKokkos : public FixFreeze {
|
|||||||
};
|
};
|
||||||
|
|
||||||
FixFreezeKokkos(class LAMMPS *, int, char **);
|
FixFreezeKokkos(class LAMMPS *, int, char **);
|
||||||
int setmask();
|
|
||||||
void init();
|
|
||||||
void setup(int);
|
|
||||||
void post_force(int);
|
void post_force(int);
|
||||||
void post_force_respa(int, int, int);
|
|
||||||
double compute_vector(int);
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void operator()(const int i, OriginalForce &original) const;
|
void operator()(const int i, OriginalForce &original) const;
|
||||||
|
|||||||
@ -87,6 +87,9 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
|
|||||||
{
|
{
|
||||||
copymode = 1;
|
copymode = 1;
|
||||||
|
|
||||||
|
k_firstflag.sync<DeviceType>();
|
||||||
|
k_firstvalue.sync<DeviceType>();
|
||||||
|
|
||||||
h_resize() = 1;
|
h_resize() = 1;
|
||||||
while (h_resize() > 0) {
|
while (h_resize() > 0) {
|
||||||
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
|
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
|
||||||
@ -168,6 +171,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
|||||||
{
|
{
|
||||||
tag = atomKK->k_tag.view<DeviceType>();
|
tag = atomKK->k_tag.view<DeviceType>();
|
||||||
|
|
||||||
|
k_firstflag.sync<DeviceType>();
|
||||||
|
k_firstvalue.sync<DeviceType>();
|
||||||
|
|
||||||
int inum = pair->list->inum;
|
int inum = pair->list->inum;
|
||||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
|
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
|
||||||
d_numneigh = k_list->d_numneigh;
|
d_numneigh = k_list->d_numneigh;
|
||||||
@ -185,8 +191,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
|||||||
|
|
||||||
if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) {
|
if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) {
|
||||||
maxatom = nall;
|
maxatom = nall;
|
||||||
d_firstflag = Kokkos::View<int**>("neighbor_history:firstflag",maxatom,k_list->maxneighs);
|
k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs);
|
||||||
d_firstvalue = Kokkos::View<LMP_FLOAT**>("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
|
k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
|
||||||
|
d_firstflag = k_firstflag.view<DeviceType>();
|
||||||
|
d_firstvalue = k_firstvalue.view<DeviceType>();
|
||||||
}
|
}
|
||||||
|
|
||||||
copymode = 1;
|
copymode = 1;
|
||||||
@ -194,6 +202,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
|||||||
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
|
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
|
||||||
Kokkos::parallel_for(inum,f);
|
Kokkos::parallel_for(inum,f);
|
||||||
|
|
||||||
|
k_firstflag.modify<DeviceType>();
|
||||||
|
k_firstvalue.modify<DeviceType>();
|
||||||
|
|
||||||
copymode = 0;
|
copymode = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -50,10 +50,13 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
|
|||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void post_neighbor_item(const int &ii) const;
|
void post_neighbor_item(const int &ii) const;
|
||||||
|
|
||||||
typename Kokkos::View<int**> d_firstflag;
|
typename DAT::tdual_int_2d k_firstflag;
|
||||||
typename Kokkos::View<LMP_FLOAT**> d_firstvalue;
|
typename DAT::tdual_float_2d k_firstvalue;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
typename ArrayTypes<DeviceType>::t_int_2d d_firstflag;
|
||||||
|
typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue;
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
|
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
|
||||||
typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
|
typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
|
||||||
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
|
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
|
||||||
@ -74,6 +77,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
|
|||||||
|
|
||||||
template <class DeviceType>
|
template <class DeviceType>
|
||||||
struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
||||||
|
typedef DeviceType device_type;
|
||||||
FixNeighHistoryKokkos<DeviceType> c;
|
FixNeighHistoryKokkos<DeviceType> c;
|
||||||
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
@ -84,6 +88,7 @@ struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
|||||||
|
|
||||||
template <class DeviceType>
|
template <class DeviceType>
|
||||||
struct FixNeighHistoryKokkosPreExchangeFunctor {
|
struct FixNeighHistoryKokkosPreExchangeFunctor {
|
||||||
|
typedef DeviceType device_type;
|
||||||
FixNeighHistoryKokkos<DeviceType> c;
|
FixNeighHistoryKokkos<DeviceType> c;
|
||||||
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
@ -94,6 +99,7 @@ struct FixNeighHistoryKokkosPreExchangeFunctor {
|
|||||||
|
|
||||||
template <class DeviceType>
|
template <class DeviceType>
|
||||||
struct FixNeighHistoryKokkosPostNeighborFunctor {
|
struct FixNeighHistoryKokkosPostNeighborFunctor {
|
||||||
|
typedef DeviceType device_type;
|
||||||
FixNeighHistoryKokkos<DeviceType> c;
|
FixNeighHistoryKokkos<DeviceType> c;
|
||||||
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
|||||||
@ -31,8 +31,8 @@ FixNVESphereKokkos<DeviceType>::FixNVESphereKokkos(LAMMPS *lmp, int narg, char *
|
|||||||
atomKK = (AtomKokkos *)atom;
|
atomKK = (AtomKokkos *)atom;
|
||||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||||
|
|
||||||
datamask_read = F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK;
|
datamask_read = EMPTY_MASK;
|
||||||
datamask_modify = X_MASK | V_MASK | OMEGA_MASK;
|
datamask_modify = EMPTY_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -61,8 +61,7 @@ void FixNVESphereKokkos<DeviceType>::init()
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||||
{
|
{
|
||||||
atomKK->sync(execution_space,datamask_read);
|
atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
|
||||||
atomKK->modified(execution_space,datamask_modify);
|
|
||||||
|
|
||||||
x = atomKK->k_x.view<DeviceType>();
|
x = atomKK->k_x.view<DeviceType>();
|
||||||
v = atomKK->k_v.view<DeviceType>();
|
v = atomKK->k_v.view<DeviceType>();
|
||||||
@ -78,6 +77,8 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
|||||||
|
|
||||||
FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this);
|
FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this);
|
||||||
Kokkos::parallel_for(nlocal,f);
|
Kokkos::parallel_for(nlocal,f);
|
||||||
|
|
||||||
|
atomKK->modified(execution_space, X_MASK | V_MASK | OMEGA_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -109,8 +110,7 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate_item(const int i) const
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void FixNVESphereKokkos<DeviceType>::final_integrate()
|
void FixNVESphereKokkos<DeviceType>::final_integrate()
|
||||||
{
|
{
|
||||||
atomKK->sync(execution_space,datamask_read);
|
atomKK->sync(execution_space, V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
|
||||||
atomKK->modified(execution_space,datamask_modify);
|
|
||||||
|
|
||||||
v = atomKK->k_v.view<DeviceType>();
|
v = atomKK->k_v.view<DeviceType>();
|
||||||
omega = atomKK->k_omega.view<DeviceType>();
|
omega = atomKK->k_omega.view<DeviceType>();
|
||||||
@ -125,6 +125,8 @@ void FixNVESphereKokkos<DeviceType>::final_integrate()
|
|||||||
|
|
||||||
FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this);
|
FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this);
|
||||||
Kokkos::parallel_for(nlocal,f);
|
Kokkos::parallel_for(nlocal,f);
|
||||||
|
|
||||||
|
atomKK->modified(execution_space, V_MASK | OMEGA_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -56,6 +56,7 @@ class FixNVESphereKokkos : public FixNVESphere {
|
|||||||
|
|
||||||
template <class DeviceType>
|
template <class DeviceType>
|
||||||
struct FixNVESphereKokkosInitialIntegrateFunctor {
|
struct FixNVESphereKokkosInitialIntegrateFunctor {
|
||||||
|
typedef DeviceType device_type;
|
||||||
FixNVESphereKokkos<DeviceType> c;
|
FixNVESphereKokkos<DeviceType> c;
|
||||||
FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
@ -66,6 +67,7 @@ struct FixNVESphereKokkosInitialIntegrateFunctor {
|
|||||||
|
|
||||||
template <class DeviceType>
|
template <class DeviceType>
|
||||||
struct FixNVESphereKokkosFinalIntegrateFunctor {
|
struct FixNVESphereKokkosFinalIntegrateFunctor {
|
||||||
|
typedef DeviceType device_type;
|
||||||
FixNVESphereKokkos<DeviceType> c;
|
FixNVESphereKokkos<DeviceType> c;
|
||||||
FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
|||||||
@ -65,8 +65,6 @@ void FixNVTSllodKokkos<DeviceType>::init()
|
|||||||
{
|
{
|
||||||
FixNHKokkos<DeviceType>::init();
|
FixNHKokkos<DeviceType>::init();
|
||||||
|
|
||||||
vdelu = typename ArrayTypes<DeviceType>::t_v_array("nvt/sllod/kk:vdelu", atomKK->nlocal);
|
|
||||||
|
|
||||||
if (!this->temperature->tempbias)
|
if (!this->temperature->tempbias)
|
||||||
this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias");
|
this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias");
|
||||||
|
|
||||||
@ -100,7 +98,7 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
|
|||||||
// calculate temperature since some computes require temp
|
// calculate temperature since some computes require temp
|
||||||
// computed on current nlocal atoms to remove bias
|
// computed on current nlocal atoms to remove bias
|
||||||
|
|
||||||
if (nondeformbias){
|
if (nondeformbias) {
|
||||||
atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read);
|
atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read);
|
||||||
this->temperature->compute_scalar();
|
this->temperature->compute_scalar();
|
||||||
atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify);
|
atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify);
|
||||||
@ -115,6 +113,9 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
|
|||||||
|
|
||||||
d_h_two = Few<double, 6>(h_two);
|
d_h_two = Few<double, 6>(h_two);
|
||||||
|
|
||||||
|
if (vdelu.extent(0) < atomKK->nmax)
|
||||||
|
vdelu = typename AT::t_v_array(Kokkos::NoInit("nvt/sllod/kk:vdelu"), atomKK->nmax);
|
||||||
|
|
||||||
this->copymode = 1;
|
this->copymode = 1;
|
||||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNVTSllod_temp1>(0,nlocal),*this);
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNVTSllod_temp1>(0,nlocal),*this);
|
||||||
this->copymode = 0;
|
this->copymode = 0;
|
||||||
|
|||||||
@ -35,6 +35,9 @@ struct TagFixNVTSllod_temp2{};
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
|
class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
|
||||||
public:
|
public:
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
|
|
||||||
FixNVTSllodKokkos(class LAMMPS *, int, char **);
|
FixNVTSllodKokkos(class LAMMPS *, int, char **);
|
||||||
~FixNVTSllodKokkos() {}
|
~FixNVTSllodKokkos() {}
|
||||||
void init();
|
void init();
|
||||||
@ -51,14 +54,14 @@ class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
|
|||||||
void nh_v_temp();
|
void nh_v_temp();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typename ArrayTypes<DeviceType>::t_x_array x;
|
typename AT::t_x_array x;
|
||||||
typename ArrayTypes<DeviceType>::t_v_array v;
|
typename AT::t_v_array v;
|
||||||
typename ArrayTypes<DeviceType>::t_v_array vdelu;
|
typename AT::t_v_array vdelu;
|
||||||
typename ArrayTypes<DeviceType>::t_f_array_const f;
|
typename AT::t_f_array_const f;
|
||||||
typename ArrayTypes<DeviceType>::t_float_1d rmass;
|
typename AT::t_float_1d rmass;
|
||||||
typename ArrayTypes<DeviceType>::t_float_1d mass;
|
typename AT::t_float_1d mass;
|
||||||
typename ArrayTypes<DeviceType>::t_int_1d type;
|
typename AT::t_int_1d type;
|
||||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
typename AT::t_int_1d mask;
|
||||||
|
|
||||||
Few<double, 6> d_h_two;
|
Few<double, 6> d_h_two;
|
||||||
|
|
||||||
|
|||||||
@ -45,23 +45,23 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg)
|
|||||||
void FixPropertyAtomKokkos::grow_arrays(int nmax)
|
void FixPropertyAtomKokkos::grow_arrays(int nmax)
|
||||||
{
|
{
|
||||||
for (int m = 0; m < nvalue; m++) {
|
for (int m = 0; m < nvalue; m++) {
|
||||||
if (style[m] == MOLECULE) {
|
if (styles[m] == MOLECULE) {
|
||||||
memory->grow(atom->molecule,nmax,"atom:molecule");
|
memory->grow(atom->molecule,nmax,"atom:molecule");
|
||||||
size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
|
size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
|
||||||
memset(&atom->molecule[nmax_old],0,nbytes);
|
memset(&atom->molecule[nmax_old],0,nbytes);
|
||||||
} else if (style[m] == CHARGE) {
|
} else if (styles[m] == CHARGE) {
|
||||||
memory->grow(atom->q,nmax,"atom:q");
|
memory->grow(atom->q,nmax,"atom:q");
|
||||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||||
memset(&atom->q[nmax_old],0,nbytes);
|
memset(&atom->q[nmax_old],0,nbytes);
|
||||||
} else if (style[m] == RMASS) {
|
} else if (styles[m] == RMASS) {
|
||||||
memory->grow(atom->rmass,nmax,"atom:rmass");
|
memory->grow(atom->rmass,nmax,"atom:rmass");
|
||||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||||
memset(&atom->rmass[nmax_old],0,nbytes);
|
memset(&atom->rmass[nmax_old],0,nbytes);
|
||||||
} else if (style[m] == INTEGER) {
|
} else if (styles[m] == INTEGER) {
|
||||||
memory->grow(atom->ivector[index[m]],nmax,"atom:ivector");
|
memory->grow(atom->ivector[index[m]],nmax,"atom:ivector");
|
||||||
size_t nbytes = (nmax-nmax_old) * sizeof(int);
|
size_t nbytes = (nmax-nmax_old) * sizeof(int);
|
||||||
memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
|
memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
|
||||||
} else if (style[m] == DOUBLE) {
|
} else if (styles[m] == DOUBLE) {
|
||||||
atomKK->sync(Device,DVECTOR_MASK);
|
atomKK->sync(Device,DVECTOR_MASK);
|
||||||
memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
|
memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
|
||||||
"atom:dvector");
|
"atom:dvector");
|
||||||
|
|||||||
@ -165,8 +165,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||||||
d_neighbors.extent(1) != d_neighbors_touch.extent(1))
|
d_neighbors.extent(1) != d_neighbors_touch.extent(1))
|
||||||
d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1));
|
d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1));
|
||||||
|
|
||||||
d_firsttouch = fix_historyKK->d_firstflag;
|
fix_historyKK->k_firstflag.template sync<DeviceType>();
|
||||||
d_firstshear = fix_historyKK->d_firstvalue;
|
fix_historyKK->k_firstvalue.template sync<DeviceType>();
|
||||||
|
|
||||||
|
d_firsttouch = fix_historyKK->k_firstflag.template view<DeviceType>();
|
||||||
|
d_firstshear = fix_historyKK->k_firstvalue.template view<DeviceType>();
|
||||||
|
|
||||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
|
||||||
|
|
||||||
@ -258,6 +261,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (eflag_atom) {
|
||||||
|
k_eatom.template modify<DeviceType>();
|
||||||
|
k_eatom.template sync<LMPHostType>();
|
||||||
|
}
|
||||||
|
|
||||||
if (vflag_global) {
|
if (vflag_global) {
|
||||||
virial[0] += ev.v[0];
|
virial[0] += ev.v[0];
|
||||||
virial[1] += ev.v[1];
|
virial[1] += ev.v[1];
|
||||||
|
|||||||
@ -92,8 +92,8 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
|
|||||||
typename AT::t_int_1d_randomread d_ilist;
|
typename AT::t_int_1d_randomread d_ilist;
|
||||||
typename AT::t_int_1d_randomread d_numneigh;
|
typename AT::t_int_1d_randomread d_numneigh;
|
||||||
|
|
||||||
typename Kokkos::View<int**> d_firsttouch;
|
typename AT::t_int_2d d_firsttouch;
|
||||||
typename Kokkos::View<LMP_FLOAT**> d_firstshear;
|
typename AT::t_float_2d d_firstshear;
|
||||||
|
|
||||||
typename AT::t_neighbors_2d d_neighbors_touch;
|
typename AT::t_neighbors_2d d_neighbors_touch;
|
||||||
typename AT::t_int_1d d_numneigh_touch;
|
typename AT::t_int_1d d_numneigh_touch;
|
||||||
|
|||||||
@ -69,7 +69,7 @@ PairLJCharmmCoulCharmmKokkos<DeviceType>::~PairLJCharmmCoulCharmmKokkos()
|
|||||||
if (allocated) {
|
if (allocated) {
|
||||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||||
k_cutsq = DAT::tdual_ffloat_2d();
|
memoryKK->destroy_kokkos(k_cutsq,cutsq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
// clang-format off
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/, Sandia National Laboratories
|
https://www.lammps.org/, Sandia National Laboratories
|
||||||
|
|||||||
@ -92,40 +92,22 @@ void VerletKokkos::setup(int flag)
|
|||||||
// acquire ghosts
|
// acquire ghosts
|
||||||
// build neighbor lists
|
// build neighbor lists
|
||||||
|
|
||||||
atomKK->sync(Host,ALL_MASK);
|
lmp->kokkos->auto_sync = 1;
|
||||||
atomKK->modified(Host,ALL_MASK);
|
|
||||||
|
|
||||||
atomKK->setup();
|
atom->setup();
|
||||||
modify->setup_pre_exchange();
|
modify->setup_pre_exchange();
|
||||||
// debug
|
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
atomKK->modified(Host,ALL_MASK);
|
|
||||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
|
||||||
domain->pbc();
|
domain->pbc();
|
||||||
|
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
|
|
||||||
|
|
||||||
domain->reset_box();
|
domain->reset_box();
|
||||||
comm->setup();
|
comm->setup();
|
||||||
if (neighbor->style) neighbor->setup_bins();
|
if (neighbor->style) neighbor->setup_bins();
|
||||||
|
|
||||||
comm->exchange();
|
comm->exchange();
|
||||||
|
if (atom->sortfreq > 0) atom->sort();
|
||||||
if (atomKK->sortfreq > 0) atomKK->sort();
|
|
||||||
|
|
||||||
comm->borders();
|
comm->borders();
|
||||||
|
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
|
||||||
|
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
|
|
||||||
domain->image_check();
|
domain->image_check();
|
||||||
domain->box_too_small_check();
|
domain->box_too_small_check();
|
||||||
modify->setup_pre_neighbor();
|
modify->setup_pre_neighbor();
|
||||||
|
|
||||||
atomKK->modified(Host,ALL_MASK);
|
|
||||||
|
|
||||||
neighbor->build(1);
|
neighbor->build(1);
|
||||||
modify->setup_post_neighbor();
|
modify->setup_post_neighbor();
|
||||||
neighbor->ncalls = 0;
|
neighbor->ncalls = 0;
|
||||||
@ -144,7 +126,7 @@ void VerletKokkos::setup(int flag)
|
|||||||
}
|
}
|
||||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||||
|
|
||||||
if (atomKK->molecular != Atom::ATOMIC) {
|
if (atom->molecular != Atom::ATOMIC) {
|
||||||
if (force->bond) {
|
if (force->bond) {
|
||||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||||
force->bond->compute(eflag,vflag);
|
force->bond->compute(eflag,vflag);
|
||||||
@ -200,35 +182,21 @@ void VerletKokkos::setup_minimal(int flag)
|
|||||||
// acquire ghosts
|
// acquire ghosts
|
||||||
// build neighbor lists
|
// build neighbor lists
|
||||||
|
|
||||||
|
lmp->kokkos->auto_sync = 1;
|
||||||
|
|
||||||
if (flag) {
|
if (flag) {
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
atomKK->modified(Host,ALL_MASK);
|
|
||||||
|
|
||||||
modify->setup_pre_exchange();
|
modify->setup_pre_exchange();
|
||||||
// debug
|
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
atomKK->modified(Host,ALL_MASK);
|
|
||||||
|
|
||||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
|
||||||
domain->pbc();
|
domain->pbc();
|
||||||
|
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
|
|
||||||
domain->reset_box();
|
domain->reset_box();
|
||||||
comm->setup();
|
comm->setup();
|
||||||
if (neighbor->style) neighbor->setup_bins();
|
if (neighbor->style) neighbor->setup_bins();
|
||||||
comm->exchange();
|
comm->exchange();
|
||||||
comm->borders();
|
comm->borders();
|
||||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||||
|
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
|
|
||||||
domain->image_check();
|
domain->image_check();
|
||||||
domain->box_too_small_check();
|
domain->box_too_small_check();
|
||||||
modify->setup_pre_neighbor();
|
modify->setup_pre_neighbor();
|
||||||
|
|
||||||
atomKK->modified(Host,ALL_MASK);
|
|
||||||
|
|
||||||
neighbor->build(1);
|
neighbor->build(1);
|
||||||
modify->setup_post_neighbor();
|
modify->setup_post_neighbor();
|
||||||
neighbor->ncalls = 0;
|
neighbor->ncalls = 0;
|
||||||
@ -247,7 +215,7 @@ void VerletKokkos::setup_minimal(int flag)
|
|||||||
}
|
}
|
||||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||||
|
|
||||||
if (atomKK->molecular != Atom::ATOMIC) {
|
if (atom->molecular != Atom::ATOMIC) {
|
||||||
if (force->bond) {
|
if (force->bond) {
|
||||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||||
force->bond->compute(eflag,vflag);
|
force->bond->compute(eflag,vflag);
|
||||||
|
|||||||
@ -23,7 +23,7 @@ ARCHIVE = ar
|
|||||||
ARFLAGS = -rc
|
ARFLAGS = -rc
|
||||||
SHLIBFLAGS = -shared
|
SHLIBFLAGS = -shared
|
||||||
KOKKOS_DEVICES = Cuda
|
KOKKOS_DEVICES = Cuda
|
||||||
KOKKOS_ARCH = Kepler35
|
KOKKOS_ARCH = Volta70
|
||||||
|
|
||||||
# ---------------------------------------------------------------------
|
# ---------------------------------------------------------------------
|
||||||
# LAMMPS-specific settings, all OPTIONAL
|
# LAMMPS-specific settings, all OPTIONAL
|
||||||
|
|||||||
@ -1990,7 +1990,10 @@ int FixBondReact::check_constraints()
|
|||||||
*ptr = satisfied[i] ? '1' : '0';
|
*ptr = satisfied[i] ? '1' : '0';
|
||||||
}
|
}
|
||||||
double verdict = input->variable->evaluate_boolean(evalstr);
|
double verdict = input->variable->evaluate_boolean(evalstr);
|
||||||
if (verdict == 0.0) return 0;
|
if (verdict == 0.0) {
|
||||||
|
memory->destroy(satisfied);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// let's also check chirality within 'check_constraint'
|
// let's also check chirality within 'check_constraint'
|
||||||
@ -2012,7 +2015,10 @@ int FixBondReact::check_constraints()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) return 0;
|
if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) {
|
||||||
|
memory->destroy(satisfied);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user