Merge branch 'develop' into mdi-tweak

This commit is contained in:
Axel Kohlmeyer
2022-09-09 00:03:39 -04:00
682 changed files with 255719 additions and 15468 deletions

View File

@ -787,8 +787,7 @@ namespace ATC {
xtArgs[3] = 1.; xtArgs[4] = 1.; xtArgs[5] = 1.;
xtArgs[6] = coulombConstant*chargeDensity;
xtArgs[7] = -1.;
string radialPower = "radial_power";
f = XT_Function_Mgr::instance()->function(radialPower,8,xtArgs);
f = XT_Function_Mgr::instance()->function("radial_power",8,xtArgs);
for (iset = faceset->begin(); iset != faceset->end(); iset++) {

View File

@ -186,7 +186,7 @@ static const double localCoordinatesTolerance = 1.e-09;
double c[3]={0,0,0};
c[0] = y0*X[0] - y0*X[1] - y0*X[2] + y0*X[3] - x0*Y[0] + (X[1]*Y[0])*0.5 + (X[2]*Y[0])*0.5 + x0*Y[1] - (X[0]*Y[1])*0.5 - (X[3]*Y[1])*0.5 + x0*Y[2] - (X[0]*Y[2])*0.5 - (X[3]*Y[2])*0.5 - x0*Y[3] + (X[1]*Y[3])*0.5 + (X[2]*Y[3])*0.5;
c[1] = -(y0*X[0]) + y0*X[1] - y0*X[2] + y0*X[3] + x0*Y[0] - X[1]*Y[0] - x0*Y[1] + X[0]*Y[1] + x0*Y[2] - X[3]*Y[2] - x0*Y[3] + X[2]*Y[3];
c[1] = (X[1]*Y[0])*0.5 - (X[2]*Y[0])*0.5 - (X[0]*Y[1])*0.5 + (X[3]*Y[1])*0.5 + (X[0]*Y[2])*0.5 - (X[3]*Y[2])*0.5 - (X[1]*Y[3])*0.5 + (X[2]*Y[3])*0.5;
c[2] = (X[1]*Y[0])*0.5 - (X[2]*Y[0])*0.5 - (X[0]*Y[1])*0.5 + (X[3]*Y[1])*0.5 + (X[0]*Y[2])*0.5 - (X[3]*Y[2])*0.5 - (X[1]*Y[3])*0.5 + (X[2]*Y[3])*0.5;
double xi2[2]={0,0};
int nroots = solve_quadratic(c,xi2);
if (nroots == 0) throw ATC_Error("no real roots in 2D analytic projection guess");

View File

@ -48,7 +48,7 @@ namespace ATC {
}
// add user function into the if statement and assign returnFunction to it
UXT_Function* UXT_Function_Mgr::function(string & type, int nargs, double * args)
UXT_Function* UXT_Function_Mgr::function(const string & type, int nargs, double * args)
{
UXT_Function * returnFunction;
if (type=="linear") {
@ -167,7 +167,7 @@ XT_Function_Mgr * XT_Function_Mgr::myInstance_ = nullptr;
}
// add user function into the if statement and assign returnFunction to it
XT_Function* XT_Function_Mgr::function(string & type, int nargs, double * args)
XT_Function* XT_Function_Mgr::function(const string & type, int nargs, double * args)
{
XT_Function * returnFunction;
if (type=="constant") {

View File

@ -110,7 +110,7 @@ namespace ATC {
/** Static instance of this class */
static UXT_Function_Mgr * instance();
UXT_Function* function(std::string & type, int nargs, double * arg);
UXT_Function* function(const std::string & type, int nargs, double * arg);
UXT_Function* function(char ** arg, int nargs);
UXT_Function* linear_function(double c0, double c1);
UXT_Function* copy_UXT_function(UXT_Function* other);
@ -181,7 +181,7 @@ namespace ATC {
/** Static instance of this class */
static XT_Function_Mgr * instance();
XT_Function* function(std::string & type, int nargs, double * arg);
XT_Function* function(const std::string & type, int nargs, double * arg);
XT_Function* function(char ** arg, int nargs);
XT_Function* constant_function(double c);
XT_Function* copy_XT_function(XT_Function* other);

View File

@ -1,6 +1,9 @@
# /* ----------------------------------------------------------------------
# Generic Linux Makefile for HIP
# - export HIP_PLATFORM=amd (or nvcc) before execution
# - export HIP_PATH=/path/to/HIP/install path to the HIP implementation
# such as hipamd or CHIP-SPV.
# - export HIP_PLATFORM=<amd/nvcc/spirv> specify the HIP platform to use.
# Optional. If not set, will be determined by ${HIP_PATH}/bin/hipconfig.
# - change HIP_ARCH for your GPU
# ------------------------------------------------------------------------- */
@ -20,41 +23,62 @@ HIP_OPTS = -O3
HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp
HIP_HOST_INCLUDE =
ifndef HIP_PATH
$(error HIP_PATH is not set)
endif
ifndef HIP_PLATFORM
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
endif
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
# use device sort
# requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively)
ifneq (spirv,$(HIP_PLATFORM))
# hipCUB not aviable for CHIP-SPV
HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT
endif
# path to cub
HIP_HOST_INCLUDE += -I./
# path to hipcub
HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include
ifeq (amd,$(HIP_PLATFORM))
# newer version of ROCm (5.1+) require c++14 for rocprim
HIP_OPTS += -std=c++14
# newer version of ROCm (5.1+) require c++14 for rocprim
HIP_OPTS += -std=c++14
endif
# use mpi
HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT
# this settings should match LAMMPS Makefile
MPI_COMP_OPTS = $(shell mpicxx --showme:compile)
# automatic flag detection for OpenMPI
ifeq ($(shell mpicxx --showme:compile >/dev/null 2>&1; echo $$?), 0)
MPI_COMP_OPTS = $(shell mpicxx --showme:compile) -DOMPI_SKIP_MPICXX=1
MPI_LINK_OPTS = $(shell mpicxx --showme:link)
HIP_PATH ?= $(wildcard /opt/rocm/hip)
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
# automatic flag detection for MPICH
else ifeq ($(shell mpicxx -compile_info >/dev/null 2>&1; echo $$?),0)
MPI_COMP_OPTS = $(filter -I%,$(shell mpicxx -compile_info)) -DMPICH_IGNORE_CXX_SEEK
MPI_LINK_OPTS = $(filter -Wl%,$(shell mpicxx -link_info)) $(filter -L%,$(shell mpicxx -link_info)) $(filter -l%,$(shell mpicxx -link_info))
# for other MPI libs: must set flags manually, if needed
else
MPI_COMP_OPTS =
MPI_LINK_OPTS =
endif
ifeq (hcc,$(HIP_PLATFORM))
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
else ifeq (amd,$(HIP_PLATFORM))
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
else ifeq (nvcc,$(HIP_PLATFORM))
HIP_OPTS += --use_fast_math
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
HIP_OPTS += --use_fast_math
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75]
else ifeq (spirv,$(HIP_PLATFORM))
HIP_ARCH = spirv
endif
BIN_DIR = .
@ -71,7 +95,15 @@ BSH = /bin/sh
HIP_OPTS += -DUSE_HIP $(HIP_PRECISION)
HIP_GPU_OPTS += $(HIP_OPTS) -I./
ifeq (clang,$(HIP_COMPILER))
ifeq (spirv,$(HIP_PLATFORM))
HIP_HOST_OPTS += -fPIC
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c
HIP_GPU_OPTS_S =
HIP_GPU_OPTS_E =
HIP_KERNEL_SUFFIX = .cpp
HIP_LIBS_TARGET =
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
else ifeq (clang,$(HIP_COMPILER))
HIP_HOST_OPTS += -fPIC
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco
HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH)

View File

@ -394,7 +394,7 @@ UCL_Device::~UCL_Device() {
clear();
}
int UCL_Device::set_platform(const int) {
int UCL_Device::set_platform(const int pid) {
clear();
#ifdef UCL_DEBUG
assert(pid<num_platforms());

View File

@ -57,16 +57,26 @@ _texture( q_tex,int2);
#define q_tex q_
#endif
/* ----------------------------------------------------------------------
GPU analogue of Atom::map inline method,
but now limited to map_array mapping style.
Map global ID to local index of atom.
---------------------------------------------------------------------- */
ucl_inline int atom_mapping(const __global int *map, tagint glob) {
return map[glob];
}
/* ----------------------------------------------------------------------
GPU version of Domain::closest_image(int, int) method.
Return local index of atom J or any of its images that is closest to atom I
if J is not a valid index like -1, just return it.
---------------------------------------------------------------------- */
ucl_inline int closest_image(int i, int j, const __global int* sametag,
const __global numtyp4 *restrict x_)
{
if (j < 0) return j;
numtyp4 xi; fetch4(xi,i,pos_tex); // = x[i];
numtyp4 xi; fetch4(xi,i,pos_tex);
numtyp4 xj; fetch4(xj,j,pos_tex);
int closest = j;
@ -92,6 +102,10 @@ ucl_inline int closest_image(int i, int j, const __global int* sametag,
return closest;
}
/* ----------------------------------------------------------------------
For molecule that consists of atoms O, H1 and H2 compute position
of virtual charge site xM (return parameter)
---------------------------------------------------------------------- */
ucl_inline void compute_newsite(int iO, int iH1, int iH2,
__global numtyp4 *xM, numtyp q,
numtyp alpha, const __global numtyp4 *restrict x_) {
@ -118,23 +132,34 @@ ucl_inline void compute_newsite(int iO, int iH1, int iH2,
*xM = M;
}
__kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
/* ----------------------------------------------------------------------
Compute resulting forces (ans), energies and virial (engv).
An additional term is calculated based on the previously
calculated values on the virlual sites (ansO),
which should be distributed over the real atoms.
For some hydrogens, the corresponding oxygens are
not local atoms and the ansO value is not calculated.
The required increase is calculated directly in the main function.
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_long_distrib(
const __global numtyp4 *restrict x_,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
const int eflag, const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom,
__global int *restrict hneigh,
__global numtyp4 *restrict m,
const __global int *restrict hneigh,
const __global numtyp4 *restrict m,
const int typeO, const int typeH,
const numtyp alpha,
const __global numtyp *restrict q_, const __global acctyp4 *restrict ansO) {
const __global numtyp *restrict q_,
const __global acctyp4 *restrict ansO) {
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
acctyp4 f;
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
if (i<inum) {
numtyp4 ix; fetch4(ix,i,pos_tex);// = x_[i];
numtyp4 ix; fetch4(ix,i,pos_tex);
int itype = ix.w;
acctyp4 fM, vM;
acctyp eM;
@ -191,21 +216,28 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
} // if ii
}
__kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
const __global int * dev_nbor,
const __global int * dev_packed,
/* ----------------------------------------------------------------------
Rebuild hneigh after the neighbor build.
hneight stores local IDs of H1 and H2 for each local and ghost O
and local ID of O for each local H.
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_reneigh(
const __global numtyp4 *restrict x_,
const __global int *dev_nbor,
const __global int *dev_packed,
const int nall, const int inum,
const int nbor_pitch, const int t_per_atom,
__global int *restrict hneigh,
__global numtyp4 *restrict m,
const int typeO, const int typeH,
const __global tagint *restrict tag, const __global int *restrict map,
const __global tagint *restrict tag,
const __global int *restrict map,
const __global int *restrict sametag) {
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
if (i<nall) {
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
numtyp4 ix; fetch4(ix,i,pos_tex);
int iH1, iH2, iO;
int itype = ix.w;
@ -217,36 +249,33 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
// set iH1,iH2 to closest image to O
iH1 = closest_image(i, iH1, sametag, x_);
iH2 = closest_image(i, iH2, sametag, x_);
hneigh[i*4 ] = iH1;
hneigh[i*4+1] = iH2;
hneigh[i*4+2] = -1;
}
}
if (itype == typeH) {
if (i<inum && itype == typeH) {
if (hneigh[i*4+2] != -1) {
int iI, iH;
iI = atom_mapping(map,tag[i] - 1);
numtyp4 iIx; fetch4(iIx,iI,pos_tex); //x_[iI];
iO = closest_image(i,iI,sametag, x_);
numtyp4 iIx; fetch4(iIx,iO,pos_tex); //x_[iI];
if ((int)iIx.w == typeH) {
iO = atom_mapping(map,tag[i] - 2);
iO = closest_image(i, iO, sametag, x_);
iH1 = closest_image(i, iI, sametag, x_);
iH2 = i;
} else { //if ((int)iIx.w == typeO)
iH = atom_mapping(map, tag[i] + 1);
iO = closest_image(i,iI,sametag, x_);
iH1 = i;
iH2 = closest_image(i,iH,sametag, x_);
}
hneigh[i*4+0] = iO;
hneigh[i*4+1] += -1;
hneigh[i*4+1] = -1;
hneigh[i*4+2] = -1;
}
}
}
}
/* ----------------------------------------------------------------------
On each timestep update virual charge coordinates (m output parameter).
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
const __global int * dev_nbor,
const __global int * dev_packed,
@ -268,11 +297,27 @@ __kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
iH2 = hneigh[i*4+1];
iO = i;
numtyp qO; fetch(qO,iO,q_tex);
compute_newsite(iO,iH1,iH2, &m[iO], qO, alpha, x_);
if (iH1>=0 && iH2>=0) {
compute_newsite(iO,iH1,iH2, &m[iO], qO, alpha, x_);
} else {
m[iO] = ix;
m[iO].w = qO;
hneigh[i*4] = iO;
hneigh[i*4+1] = iO;
}
}
}
}
/* ----------------------------------------------------------------------
Compute initial value of force, energy and virial for each local particle.
The values calculated on oxygens use the virtual charge position (m) and
they are stored in a separate array (ansO) for further distribution
in a separate kernel. For some hydrogens located on the boundary
of the local region, oxygens are non-local and the contribution
of oxygen is calculated separately in this kernel for them .
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict lj1,
const __global numtyp4 *restrict lj3,
@ -331,8 +376,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
iH1 = hneigh[i*4 ];
iH2 = hneigh[i*4+1];
x1 = m[iO];
}
if (itype == typeH) {
} else if (itype == typeH) {
iO = hneigh[i *4 ];
iH1 = hneigh[iO*4 ];
iH2 = hneigh[iO*4+1];
@ -415,12 +459,12 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
fO.x += delx * force_coul;
fO.y += dely * force_coul;
fO.z += delz * force_coul;
fO.w += 0;
//fO.w += 0;
} else {
f.x += delx * force_coul;
f.y += dely * force_coul;
f.z += delz * force_coul;
f.w += 0;
//f.w += 0;
}
if (EVFLAG && eflag) {
e_coul += prefactor*(_erfc-factor_coul);
@ -431,7 +475,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
fd.y = dely*force_coul;
fd.z = delz*force_coul;
if (itype == typeO) {
numtyp cO = 1 - alpha, cH = 0.5*alpha;
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
numtyp4 vdi, vdj;
numtyp4 xH1; fetch4(xH1,iH1,pos_tex);
numtyp4 xH2; fetch4(xH2,iH2,pos_tex);
@ -449,15 +493,15 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
//vdj.w = vdj.w;
} else vdj = jx;
vO[0] += 0.5*(vdi.x - vdj.x)*fd.x;
vO[1] += 0.5*(vdi.y - vdj.y)*fd.y;
vO[2] += 0.5*(vdi.z - vdj.z)*fd.z;
vO[3] += 0.5*(vdi.x - vdj.x)*fd.y;
vO[4] += 0.5*(vdi.x - vdj.x)*fd.z;
vO[5] += 0.5*(vdi.y - vdj.y)*fd.z;
vO[0] += (numtyp)0.5*(vdi.x - vdj.x)*fd.x;
vO[1] += (numtyp)0.5*(vdi.y - vdj.y)*fd.y;
vO[2] += (numtyp)0.5*(vdi.z - vdj.z)*fd.z;
vO[3] += (numtyp)0.5*(vdi.x - vdj.x)*fd.y;
vO[4] += (numtyp)0.5*(vdi.x - vdj.x)*fd.z;
vO[5] += (numtyp)0.5*(vdi.y - vdj.y)*fd.z;
} else {
if (jtype == typeO) {
numtyp cO = 1 - alpha, cH = 0.5*alpha;
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
numtyp4 vdj;
numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex);
numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex);
@ -505,7 +549,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
prefactor *= qqrd2e*x1m.w/r;
numtyp force_coul = r2inv*prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul);
numtyp cO = 1 - alpha, cH = 0.5*alpha;
numtyp cO = (numtyp)1 - alpha, cH = (numtyp)0.5*alpha;
numtyp4 fd;
fd.x = delx * force_coul * cH;
fd.y = dely * force_coul * cH;
@ -516,7 +560,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
f.z += fd.z;
if (EVFLAG && eflag) {
e_coul += prefactor*(_erfc-factor_coul) * (acctyp)0.5 * alpha;
e_coul += prefactor*(_erfc-factor_coul) * (numtyp)0.5 * alpha;
}
if (EVFLAG && vflag) {
numtyp4 xH1; fetch4(xH1,iH1,pos_tex);
@ -746,12 +790,12 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
fO.x += delx * force_coul;
fO.y += dely * force_coul;
fO.z += delz * force_coul;
fO.w += 0;
//fO.w += 0;
} else {
f.x += delx * force_coul;
f.y += dely * force_coul;
f.z += delz * force_coul;
f.w += 0;
//f.w += 0;
}
if (EVFLAG && eflag) {
e_coul += prefactor*(_erfc-factor_coul);
@ -762,7 +806,7 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
fd.y = dely*force_coul;
fd.z = delz*force_coul;
if (itype == typeO) {
numtyp cO = 1 - alpha, cH = 0.5*alpha;
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
numtyp4 vdi, vdj;
numtyp4 xH1; fetch4(xH1,iH1,pos_tex);
numtyp4 xH2; fetch4(xH2,iH2,pos_tex);
@ -780,15 +824,15 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
//vdj.w = vdj.w;
} else vdj = jx;
vO[0] += 0.5*(vdi.x - vdj.x)*fd.x;
vO[1] += 0.5*(vdi.y - vdj.y)*fd.y;
vO[2] += 0.5*(vdi.z - vdj.z)*fd.z;
vO[3] += 0.5*(vdi.x - vdj.x)*fd.y;
vO[4] += 0.5*(vdi.x - vdj.x)*fd.z;
vO[5] += 0.5*(vdi.y - vdj.y)*fd.z;
vO[0] += (numtyp)0.5*(vdi.x - vdj.x)*fd.x;
vO[1] += (numtyp)0.5*(vdi.y - vdj.y)*fd.y;
vO[2] += (numtyp)0.5*(vdi.z - vdj.z)*fd.z;
vO[3] += (numtyp)0.5*(vdi.x - vdj.x)*fd.y;
vO[4] += (numtyp)0.5*(vdi.x - vdj.x)*fd.z;
vO[5] += (numtyp)0.5*(vdi.y - vdj.y)*fd.z;
} else {
if (jtype == typeO) {
numtyp cO = 1 - alpha, cH = 0.5*alpha;
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
numtyp4 vdj;
numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex);
numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex);
@ -836,7 +880,7 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_,
prefactor *= qqrd2e*x1m.w/r;
numtyp force_coul = r2inv*prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul);
numtyp cO = 1 - alpha, cH = 0.5*alpha;
numtyp cO = (numtyp)1.0 - alpha, cH = (numtyp)0.5*alpha;
numtyp4 fd;
fd.x = delx * force_coul * cH;
fd.y = dely * force_coul * cH;

View File

@ -30,7 +30,7 @@
// -------------------------------------------------------------------------
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#define CONFIG_ID 303
#define SIMD_SIZE 64
#else
@ -112,7 +112,7 @@
// KERNEL MACROS - TEXTURES
// -------------------------------------------------------------------------
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#define _texture(name, type) __device__ type* name
#define _texture_2d(name, type) __device__ type* name
#else
@ -134,9 +134,12 @@
int2 qt = tex1Dfetch(q_tex,i); \
ans=__hiloint2double(qt.y, qt.x); \
}
#elif defined(__HIP_PLATFORM_SPIRV__)
#define fetch4(ans,i,pos_tex) tex1Dfetch(&ans, pos_tex, i);
#define fetch(ans,i,q_tex) tex1Dfetch(&ans, q_tex,i);
#else
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
#endif
#else
#define fetch4(ans,i,x) ans=x[i]
@ -152,7 +155,7 @@
#define mu_tex mu_
#endif
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#undef fetch4
#undef fetch
@ -209,7 +212,7 @@
#endif
#endif
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#ifdef _SINGLE_SINGLE
#define shfl_down __shfl_down