Merge remote-tracking branch 'github/develop' into lammps-gui

This commit is contained in:
Axel Kohlmeyer
2023-08-02 04:10:19 -04:00
3 changed files with 13 additions and 6 deletions

View File

@ -29,7 +29,7 @@
const char *ocl_prefetch_test =
" #if (NBOR_PREFETCH == 1) \n"\
" inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); } \n"\
" #else \n"\
" #elif (NBOR_PREFETCH == 2) \n"\
" enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\
" LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC, \n"\
" LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, }; \n"\
@ -745,7 +745,14 @@ void DeviceT::estimate_gpu_overhead(const int kernel_calls,
gpu_overhead=0.0;
gpu_driver_overhead=0.0;
for (int z=0; z<11; z++) {
// TODO: XXX
// The following estimation currently fails on Intel GPUs
// that do not support double precision with OpenCL error code -5.
// Until we have a better solution, we just skip this test in this case.
int zloops = 11;
if (!gpu->double_precision()) zloops = 0;
for (int z=0; z < zloops; z++) {
gpu->sync();
gpu_barrier();
over_timer.start();

View File

@ -43,7 +43,7 @@ const char * ocl_config_strings[] =
"NVIDIA_GPU,203,32,32,1,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0",
"AMD_GPU,403,64,64,0,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0",
#ifdef _SINGLE_SINGLE
"INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,2",
"INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,0",
"APPLE_GPU,600,16,16,0,1,4,8,1,64,64,64,64,64,8,128,8,128,8,0",
#else
"INTEL_GPU,500,8,32,1,1,2,8,2,128,128,128,128,64,8,128,8,128,8,2",

View File

@ -333,12 +333,12 @@ inline void ucl_prefetch(const __global int *p) {
struct _lgpu_float3 {
float x; float y; float z;
};
struct _lgpu_double3 {
double x; double y; double z;
};
#ifdef _SINGLE_SINGLE
#define acctyp3 struct _lgpu_float3
#else
struct _lgpu_double3 {
double x; double y; double z;
};
#define acctyp3 struct _lgpu_double3
#endif