Merge branch 'develop' into mdi-tweak
This commit is contained in:
@ -787,8 +787,7 @@ namespace ATC {
|
||||
xtArgs[3] = 1.; xtArgs[4] = 1.; xtArgs[5] = 1.;
|
||||
xtArgs[6] = coulombConstant*chargeDensity;
|
||||
xtArgs[7] = -1.;
|
||||
string radialPower = "radial_power";
|
||||
f = XT_Function_Mgr::instance()->function(radialPower,8,xtArgs);
|
||||
f = XT_Function_Mgr::instance()->function("radial_power",8,xtArgs);
|
||||
|
||||
|
||||
for (iset = faceset->begin(); iset != faceset->end(); iset++) {
|
||||
|
||||
@ -186,7 +186,7 @@ static const double localCoordinatesTolerance = 1.e-09;
|
||||
double c[3]={0,0,0};
|
||||
c[0] = y0*X[0] - y0*X[1] - y0*X[2] + y0*X[3] - x0*Y[0] + (X[1]*Y[0])*0.5 + (X[2]*Y[0])*0.5 + x0*Y[1] - (X[0]*Y[1])*0.5 - (X[3]*Y[1])*0.5 + x0*Y[2] - (X[0]*Y[2])*0.5 - (X[3]*Y[2])*0.5 - x0*Y[3] + (X[1]*Y[3])*0.5 + (X[2]*Y[3])*0.5;
|
||||
c[1] = -(y0*X[0]) + y0*X[1] - y0*X[2] + y0*X[3] + x0*Y[0] - X[1]*Y[0] - x0*Y[1] + X[0]*Y[1] + x0*Y[2] - X[3]*Y[2] - x0*Y[3] + X[2]*Y[3];
|
||||
c[1] = (X[1]*Y[0])*0.5 - (X[2]*Y[0])*0.5 - (X[0]*Y[1])*0.5 + (X[3]*Y[1])*0.5 + (X[0]*Y[2])*0.5 - (X[3]*Y[2])*0.5 - (X[1]*Y[3])*0.5 + (X[2]*Y[3])*0.5;
|
||||
c[2] = (X[1]*Y[0])*0.5 - (X[2]*Y[0])*0.5 - (X[0]*Y[1])*0.5 + (X[3]*Y[1])*0.5 + (X[0]*Y[2])*0.5 - (X[3]*Y[2])*0.5 - (X[1]*Y[3])*0.5 + (X[2]*Y[3])*0.5;
|
||||
double xi2[2]={0,0};
|
||||
int nroots = solve_quadratic(c,xi2);
|
||||
if (nroots == 0) throw ATC_Error("no real roots in 2D analytic projection guess");
|
||||
|
||||
@ -48,7 +48,7 @@ namespace ATC {
|
||||
}
|
||||
|
||||
// add user function into the if statement and assign returnFunction to it
|
||||
UXT_Function* UXT_Function_Mgr::function(string & type, int nargs, double * args)
|
||||
UXT_Function* UXT_Function_Mgr::function(const string & type, int nargs, double * args)
|
||||
{
|
||||
UXT_Function * returnFunction;
|
||||
if (type=="linear") {
|
||||
@ -167,7 +167,7 @@ XT_Function_Mgr * XT_Function_Mgr::myInstance_ = nullptr;
|
||||
}
|
||||
|
||||
// add user function into the if statement and assign returnFunction to it
|
||||
XT_Function* XT_Function_Mgr::function(string & type, int nargs, double * args)
|
||||
XT_Function* XT_Function_Mgr::function(const string & type, int nargs, double * args)
|
||||
{
|
||||
XT_Function * returnFunction;
|
||||
if (type=="constant") {
|
||||
|
||||
@ -110,7 +110,7 @@ namespace ATC {
|
||||
/** Static instance of this class */
|
||||
static UXT_Function_Mgr * instance();
|
||||
|
||||
UXT_Function* function(std::string & type, int nargs, double * arg);
|
||||
UXT_Function* function(const std::string & type, int nargs, double * arg);
|
||||
UXT_Function* function(char ** arg, int nargs);
|
||||
UXT_Function* linear_function(double c0, double c1);
|
||||
UXT_Function* copy_UXT_function(UXT_Function* other);
|
||||
@ -181,7 +181,7 @@ namespace ATC {
|
||||
/** Static instance of this class */
|
||||
static XT_Function_Mgr * instance();
|
||||
|
||||
XT_Function* function(std::string & type, int nargs, double * arg);
|
||||
XT_Function* function(const std::string & type, int nargs, double * arg);
|
||||
XT_Function* function(char ** arg, int nargs);
|
||||
XT_Function* constant_function(double c);
|
||||
XT_Function* copy_XT_function(XT_Function* other);
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
# /* ----------------------------------------------------------------------
|
||||
# Generic Linux Makefile for HIP
|
||||
# - export HIP_PLATFORM=amd (or nvcc) before execution
|
||||
# - export HIP_PATH=/path/to/HIP/install path to the HIP implementation
|
||||
# such as hipamd or CHIP-SPV.
|
||||
# - export HIP_PLATFORM=<amd/nvcc/spirv> specify the HIP platform to use.
|
||||
# Optional. If not set, will be determined by ${HIP_PATH}/bin/hipconfig.
|
||||
# - change HIP_ARCH for your GPU
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
@ -20,41 +23,62 @@ HIP_OPTS = -O3
|
||||
HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp
|
||||
HIP_HOST_INCLUDE =
|
||||
|
||||
ifndef HIP_PATH
|
||||
$(error HIP_PATH is not set)
|
||||
endif
|
||||
|
||||
ifndef HIP_PLATFORM
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
|
||||
endif
|
||||
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
|
||||
# use device sort
|
||||
# requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively)
|
||||
ifneq (spirv,$(HIP_PLATFORM))
|
||||
# hipCUB not aviable for CHIP-SPV
|
||||
HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT
|
||||
endif
|
||||
# path to cub
|
||||
HIP_HOST_INCLUDE += -I./
|
||||
# path to hipcub
|
||||
HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include
|
||||
|
||||
ifeq (amd,$(HIP_PLATFORM))
|
||||
# newer version of ROCm (5.1+) require c++14 for rocprim
|
||||
HIP_OPTS += -std=c++14
|
||||
# newer version of ROCm (5.1+) require c++14 for rocprim
|
||||
HIP_OPTS += -std=c++14
|
||||
endif
|
||||
|
||||
# use mpi
|
||||
HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT
|
||||
# this settings should match LAMMPS Makefile
|
||||
MPI_COMP_OPTS = $(shell mpicxx --showme:compile)
|
||||
# automatic flag detection for OpenMPI
|
||||
ifeq ($(shell mpicxx --showme:compile >/dev/null 2>&1; echo $$?), 0)
|
||||
MPI_COMP_OPTS = $(shell mpicxx --showme:compile) -DOMPI_SKIP_MPICXX=1
|
||||
MPI_LINK_OPTS = $(shell mpicxx --showme:link)
|
||||
|
||||
HIP_PATH ?= $(wildcard /opt/rocm/hip)
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
|
||||
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
# automatic flag detection for MPICH
|
||||
else ifeq ($(shell mpicxx -compile_info >/dev/null 2>&1; echo $$?),0)
|
||||
MPI_COMP_OPTS = $(filter -I%,$(shell mpicxx -compile_info)) -DMPICH_IGNORE_CXX_SEEK
|
||||
MPI_LINK_OPTS = $(filter -Wl%,$(shell mpicxx -link_info)) $(filter -L%,$(shell mpicxx -link_info)) $(filter -l%,$(shell mpicxx -link_info))
|
||||
# for other MPI libs: must set flags manually, if needed
|
||||
else
|
||||
MPI_COMP_OPTS =
|
||||
MPI_LINK_OPTS =
|
||||
endif
|
||||
|
||||
ifeq (hcc,$(HIP_PLATFORM))
|
||||
# possible values: gfx803,gfx900,gfx906
|
||||
HIP_ARCH = gfx906
|
||||
# possible values: gfx803,gfx900,gfx906
|
||||
HIP_ARCH = gfx906
|
||||
else ifeq (amd,$(HIP_PLATFORM))
|
||||
# possible values: gfx803,gfx900,gfx906
|
||||
HIP_ARCH = gfx906
|
||||
# possible values: gfx803,gfx900,gfx906
|
||||
HIP_ARCH = gfx906
|
||||
else ifeq (nvcc,$(HIP_PLATFORM))
|
||||
HIP_OPTS += --use_fast_math
|
||||
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
|
||||
HIP_OPTS += --use_fast_math
|
||||
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
|
||||
-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\
|
||||
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\
|
||||
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75]
|
||||
else ifeq (spirv,$(HIP_PLATFORM))
|
||||
HIP_ARCH = spirv
|
||||
endif
|
||||
|
||||
BIN_DIR = .
|
||||
@ -71,7 +95,15 @@ BSH = /bin/sh
|
||||
HIP_OPTS += -DUSE_HIP $(HIP_PRECISION)
|
||||
HIP_GPU_OPTS += $(HIP_OPTS) -I./
|
||||
|
||||
ifeq (clang,$(HIP_COMPILER))
|
||||
ifeq (spirv,$(HIP_PLATFORM))
|
||||
HIP_HOST_OPTS += -fPIC
|
||||
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c
|
||||
HIP_GPU_OPTS_S =
|
||||
HIP_GPU_OPTS_E =
|
||||
HIP_KERNEL_SUFFIX = .cpp
|
||||
HIP_LIBS_TARGET =
|
||||
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
|
||||
else ifeq (clang,$(HIP_COMPILER))
|
||||
HIP_HOST_OPTS += -fPIC
|
||||
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco
|
||||
HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH)
|
||||
|
||||
@ -394,7 +394,7 @@ UCL_Device::~UCL_Device() {
|
||||
clear();
|
||||
}
|
||||
|
||||
int UCL_Device::set_platform(const int) {
|
||||
int UCL_Device::set_platform(const int pid) {
|
||||
clear();
|
||||
#ifdef UCL_DEBUG
|
||||
assert(pid<num_platforms());
|
||||
|
||||
@ -57,16 +57,26 @@ _texture( q_tex,int2);
|
||||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
GPU analogue of Atom::map inline method,
|
||||
but now limited to map_array mapping style.
|
||||
Map global ID to local index of atom.
|
||||
---------------------------------------------------------------------- */
|
||||
ucl_inline int atom_mapping(const __global int *map, tagint glob) {
|
||||
return map[glob];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
GPU version of Domain::closest_image(int, int) method.
|
||||
Return local index of atom J or any of its images that is closest to atom I
|
||||
if J is not a valid index like -1, just return it.
|
||||
---------------------------------------------------------------------- */
|
||||
ucl_inline int closest_image(int i, int j, const __global int* sametag,
|
||||
const __global numtyp4 *restrict x_)
|
||||
{
|
||||
if (j < 0) return j;
|
||||
|
||||
numtyp4 xi; fetch4(xi,i,pos_tex); // = x[i];
|
||||
numtyp4 xi; fetch4(xi,i,pos_tex);
|
||||
numtyp4 xj; fetch4(xj,j,pos_tex);
|
||||
|
||||
int closest = j;
|
||||
@ -92,6 +102,10 @@ ucl_inline int closest_image(int i, int j, const __global int* sametag,
|
||||
return closest;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
For molecule that consists of atoms O, H1 and H2 compute position
|
||||
of virtual charge site xM (return parameter)
|
||||
---------------------------------------------------------------------- */
|
||||
ucl_inline void compute_newsite(int iO, int iH1, int iH2,
|
||||
__global numtyp4 *xM, numtyp q,
|
||||
numtyp alpha, const __global numtyp4 *restrict x_) {
|
||||
@ -118,23 +132,34 @@ ucl_inline void compute_newsite(int iO, int iH1, int iH2,
|
||||
*xM = M;
|
||||
}
|
||||
|
||||
__kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
||||
/* ----------------------------------------------------------------------
|
||||
Compute resulting forces (ans), energies and virial (engv).
|
||||
An additional term is calculated based on the previously
|
||||
calculated values on the virlual sites (ansO),
|
||||
which should be distributed over the real atoms.
|
||||
For some hydrogens, the corresponding oxygens are
|
||||
not local atoms and the ansO value is not calculated.
|
||||
The required increase is calculated directly in the main function.
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_long_distrib(
|
||||
const __global numtyp4 *restrict x_,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
__global int *restrict hneigh,
|
||||
__global numtyp4 *restrict m,
|
||||
const __global int *restrict hneigh,
|
||||
const __global numtyp4 *restrict m,
|
||||
const int typeO, const int typeH,
|
||||
const numtyp alpha,
|
||||
const __global numtyp *restrict q_, const __global acctyp4 *restrict ansO) {
|
||||
const __global numtyp *restrict q_,
|
||||
const __global acctyp4 *restrict ansO) {
|
||||
|
||||
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
|
||||
if (i<inum) {
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex);// = x_[i];
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex);
|
||||
int itype = ix.w;
|
||||
acctyp4 fM, vM;
|
||||
acctyp eM;
|
||||
@ -191,21 +216,28 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
/* ----------------------------------------------------------------------
|
||||
Rebuild hneigh after the neighbor build.
|
||||
hneight stores local IDs of H1 and H2 for each local and ghost O
|
||||
and local ID of O for each local H.
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_reneigh(
|
||||
const __global numtyp4 *restrict x_,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
const int nall, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
__global int *restrict hneigh,
|
||||
__global numtyp4 *restrict m,
|
||||
const int typeO, const int typeH,
|
||||
const __global tagint *restrict tag, const __global int *restrict map,
|
||||
const __global tagint *restrict tag,
|
||||
const __global int *restrict map,
|
||||
const __global int *restrict sametag) {
|
||||
|
||||
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
||||
|
||||
if (i<nall) {
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex);
|
||||
|
||||
int iH1, iH2, iO;
|
||||
int itype = ix.w;
|
||||
@ -217,36 +249,33 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
||||
// set iH1,iH2 to closest image to O
|
||||
iH1 = closest_image(i, iH1, sametag, x_);
|
||||
iH2 = closest_image(i, iH2, sametag, x_);
|
||||
|
||||
hneigh[i*4 ] = iH1;
|
||||
hneigh[i*4+1] = iH2;
|
||||
hneigh[i*4+2] = -1;
|
||||
}
|
||||
}
|
||||
if (itype == typeH) {
|
||||
if (i<inum && itype == typeH) {
|
||||
if (hneigh[i*4+2] != -1) {
|
||||
int iI, iH;
|
||||
iI = atom_mapping(map,tag[i] - 1);
|
||||
numtyp4 iIx; fetch4(iIx,iI,pos_tex); //x_[iI];
|
||||
iO = closest_image(i,iI,sametag, x_);
|
||||
numtyp4 iIx; fetch4(iIx,iO,pos_tex); //x_[iI];
|
||||
if ((int)iIx.w == typeH) {
|
||||
iO = atom_mapping(map,tag[i] - 2);
|
||||
iO = closest_image(i, iO, sametag, x_);
|
||||
iH1 = closest_image(i, iI, sametag, x_);
|
||||
iH2 = i;
|
||||
} else { //if ((int)iIx.w == typeO)
|
||||
iH = atom_mapping(map, tag[i] + 1);
|
||||
iO = closest_image(i,iI,sametag, x_);
|
||||
iH1 = i;
|
||||
iH2 = closest_image(i,iH,sametag, x_);
|
||||
}
|
||||
hneigh[i*4+0] = iO;
|
||||
hneigh[i*4+1] += -1;
|
||||
hneigh[i*4+1] = -1;
|
||||
hneigh[i*4+2] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
On each timestep update virual charge coordinates (m output parameter).
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
@ -268,11 +297,27 @@ __kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
|
||||
iH2 = hneigh[i*4+1];
|
||||
iO = i;
|
||||
numtyp qO; fetch(qO,iO,q_tex);
|
||||
compute_newsite(iO,iH1,iH2, &m[iO], qO, alpha, x_);
|
||||
if (iH1>=0 && iH2>=0) {
|
||||
compute_newsite(iO,iH1,iH2, &m[iO], qO, alpha, x_);
|
||||
} else {
|
||||
m[iO] = ix;
|
||||
m[iO].w = qO;
|
||||
hneigh[i*4] = iO;
|
||||
hneigh[i*4+1] = iO;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Compute initial value of force, energy and virial for each local particle.
|
||||
The values calculated on oxygens use the virtual charge position (m) and
|
||||
they are stored in a separate array (ansO) for further distribution
|
||||
in a separate kernel. For some hydrogens located on the boundary
|
||||
of the local region, oxygens are non-local and the contribution
|
||||
of oxygen is calculated separately in this kernel for them .
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
@ -331,8 +376,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
iH1 = hneigh[i*4 ];
|
||||
iH2 = hneigh[i*4+1];
|
||||
x1 = m[iO];
|
||||
}
|
||||
if (itype == typeH) {
|
||||
} else if (itype == typeH) {
|
||||
iO = hneigh[i *4 ];
|
||||
iH1 = hneigh[iO*4 ];
|
||||
iH2 = hneigh[iO*4+1];
|
||||
@ -415,12 +459,12 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
fO.x += delx * force_coul;
|
||||
fO.y += dely * force_coul;
|
||||
fO.z += delz * force_coul;
|
||||
fO.w += 0;
|
||||
//fO.w += 0;
|
||||
} else {
|
||||
f.x += delx * force_coul;
|
||||
f.y += dely * force_coul;
|
||||
f.z += delz * force_coul;
|
||||
f.w += 0;
|
||||
//f.w += 0;
|
||||
}
|
||||
if (EVFLAG && eflag) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
@ -431,7 +475,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
fd.y = dely*force_coul;
|
||||
fd.z = delz*force_coul;
|
||||
if (itype == typeO) {
|
||||
numtyp cO = 1 - alpha, cH = 0.5*alpha;
|
||||
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
|
||||
numtyp4 vdi, vdj;
|
||||
numtyp4 xH1; fetch4(xH1,iH1,pos_tex);
|
||||
numtyp4 xH2; fetch4(xH2,iH2,pos_tex);
|
||||
@ -449,15 +493,15 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
||||
//vdj.w = vdj.w;
|
||||
} else vdj = jx;
|
||||
vO[0] += 0.5*(vdi.x - vdj.x)*fd.x;
|
||||
vO[1] += 0.5*(vdi.y - vdj.y)*fd.y;
|
||||
vO[2] += 0.5*(vdi.z - vdj.z)*fd.z;
|
||||
vO[3] += 0.5*(vdi.x - vdj.x)*fd.y;
|
||||
vO[4] += 0.5*(vdi.x - vdj.x)*fd.z;
|
||||
vO[5] += 0.5*(vdi.y - vdj.y)*fd.z;
|
||||
vO[0] += (numtyp)0.5*(vdi.x - vdj.x)*fd.x;
|
||||
vO[1] += (numtyp)0.5*(vdi.y - vdj.y)*fd.y;
|
||||
vO[2] += (numtyp)0.5*(vdi.z - vdj.z)*fd.z;
|
||||
vO[3] += (numtyp)0.5*(vdi.x - vdj.x)*fd.y;
|
||||
vO[4] += (numtyp)0.5*(vdi.x - vdj.x)*fd.z;
|
||||
vO[5] += (numtyp)0.5*(vdi.y - vdj.y)*fd.z;
|
||||
} else {
|
||||
if (jtype == typeO) {
|
||||
numtyp cO = 1 - alpha, cH = 0.5*alpha;
|
||||
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
|
||||
numtyp4 vdj;
|
||||
numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex);
|
||||
numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex);
|
||||
@ -505,7 +549,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
prefactor *= qqrd2e*x1m.w/r;
|
||||
numtyp force_coul = r2inv*prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul);
|
||||
|
||||
numtyp cO = 1 - alpha, cH = 0.5*alpha;
|
||||
numtyp cO = (numtyp)1 - alpha, cH = (numtyp)0.5*alpha;
|
||||
numtyp4 fd;
|
||||
fd.x = delx * force_coul * cH;
|
||||
fd.y = dely * force_coul * cH;
|
||||
@ -516,7 +560,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
f.z += fd.z;
|
||||
|
||||
if (EVFLAG && eflag) {
|
||||
e_coul += prefactor*(_erfc-factor_coul) * (acctyp)0.5 * alpha;
|
||||
e_coul += prefactor*(_erfc-factor_coul) * (numtyp)0.5 * alpha;
|
||||
}
|
||||
if (EVFLAG && vflag) {
|
||||
numtyp4 xH1; fetch4(xH1,iH1,pos_tex);
|
||||
@ -746,12 +790,12 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
|
||||
fO.x += delx * force_coul;
|
||||
fO.y += dely * force_coul;
|
||||
fO.z += delz * force_coul;
|
||||
fO.w += 0;
|
||||
//fO.w += 0;
|
||||
} else {
|
||||
f.x += delx * force_coul;
|
||||
f.y += dely * force_coul;
|
||||
f.z += delz * force_coul;
|
||||
f.w += 0;
|
||||
//f.w += 0;
|
||||
}
|
||||
if (EVFLAG && eflag) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
@ -762,7 +806,7 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
|
||||
fd.y = dely*force_coul;
|
||||
fd.z = delz*force_coul;
|
||||
if (itype == typeO) {
|
||||
numtyp cO = 1 - alpha, cH = 0.5*alpha;
|
||||
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
|
||||
numtyp4 vdi, vdj;
|
||||
numtyp4 xH1; fetch4(xH1,iH1,pos_tex);
|
||||
numtyp4 xH2; fetch4(xH2,iH2,pos_tex);
|
||||
@ -780,15 +824,15 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
|
||||
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
||||
//vdj.w = vdj.w;
|
||||
} else vdj = jx;
|
||||
vO[0] += 0.5*(vdi.x - vdj.x)*fd.x;
|
||||
vO[1] += 0.5*(vdi.y - vdj.y)*fd.y;
|
||||
vO[2] += 0.5*(vdi.z - vdj.z)*fd.z;
|
||||
vO[3] += 0.5*(vdi.x - vdj.x)*fd.y;
|
||||
vO[4] += 0.5*(vdi.x - vdj.x)*fd.z;
|
||||
vO[5] += 0.5*(vdi.y - vdj.y)*fd.z;
|
||||
vO[0] += (numtyp)0.5*(vdi.x - vdj.x)*fd.x;
|
||||
vO[1] += (numtyp)0.5*(vdi.y - vdj.y)*fd.y;
|
||||
vO[2] += (numtyp)0.5*(vdi.z - vdj.z)*fd.z;
|
||||
vO[3] += (numtyp)0.5*(vdi.x - vdj.x)*fd.y;
|
||||
vO[4] += (numtyp)0.5*(vdi.x - vdj.x)*fd.z;
|
||||
vO[5] += (numtyp)0.5*(vdi.y - vdj.y)*fd.z;
|
||||
} else {
|
||||
if (jtype == typeO) {
|
||||
numtyp cO = 1 - alpha, cH = 0.5*alpha;
|
||||
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
|
||||
numtyp4 vdj;
|
||||
numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex);
|
||||
numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex);
|
||||
@ -836,7 +880,7 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
|
||||
prefactor *= qqrd2e*x1m.w/r;
|
||||
numtyp force_coul = r2inv*prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul);
|
||||
|
||||
numtyp cO = 1 - alpha, cH = 0.5*alpha;
|
||||
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
|
||||
numtyp4 fd;
|
||||
fd.x = delx * force_coul * cH;
|
||||
fd.y = dely * force_coul * cH;
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
|
||||
#define CONFIG_ID 303
|
||||
#define SIMD_SIZE 64
|
||||
#else
|
||||
@ -112,7 +112,7 @@
|
||||
// KERNEL MACROS - TEXTURES
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
|
||||
#define _texture(name, type) __device__ type* name
|
||||
#define _texture_2d(name, type) __device__ type* name
|
||||
#else
|
||||
@ -134,9 +134,12 @@
|
||||
int2 qt = tex1Dfetch(q_tex,i); \
|
||||
ans=__hiloint2double(qt.y, qt.x); \
|
||||
}
|
||||
#elif defined(__HIP_PLATFORM_SPIRV__)
|
||||
#define fetch4(ans,i,pos_tex) tex1Dfetch(&ans, pos_tex, i);
|
||||
#define fetch(ans,i,q_tex) tex1Dfetch(&ans, q_tex,i);
|
||||
#else
|
||||
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
|
||||
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
|
||||
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
|
||||
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
|
||||
#endif
|
||||
#else
|
||||
#define fetch4(ans,i,x) ans=x[i]
|
||||
@ -152,7 +155,7 @@
|
||||
#define mu_tex mu_
|
||||
#endif
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
|
||||
|
||||
#undef fetch4
|
||||
#undef fetch
|
||||
@ -209,7 +212,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
|
||||
|
||||
#ifdef _SINGLE_SINGLE
|
||||
#define shfl_down __shfl_down
|
||||
|
||||
Reference in New Issue
Block a user