Merge remote-tracking branch 'github/develop' into lammps-gui
This commit is contained in:
@ -29,7 +29,7 @@
|
||||
const char *ocl_prefetch_test =
|
||||
" #if (NBOR_PREFETCH == 1) \n"\
|
||||
" inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); } \n"\
|
||||
" #else \n"\
|
||||
" #elif (NBOR_PREFETCH == 2) \n"\
|
||||
" enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\
|
||||
" LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC, \n"\
|
||||
" LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, }; \n"\
|
||||
@ -745,7 +745,14 @@ void DeviceT::estimate_gpu_overhead(const int kernel_calls,
|
||||
gpu_overhead=0.0;
|
||||
gpu_driver_overhead=0.0;
|
||||
|
||||
for (int z=0; z<11; z++) {
|
||||
// TODO: XXX
|
||||
// The following estimation currently fails on Intel GPUs
|
||||
// that do not support double precision with OpenCL error code -5.
|
||||
// Until we have a better solution, we just skip this test in this case.
|
||||
int zloops = 11;
|
||||
if (!gpu->double_precision()) zloops = 0;
|
||||
|
||||
for (int z=0; z < zloops; z++) {
|
||||
gpu->sync();
|
||||
gpu_barrier();
|
||||
over_timer.start();
|
||||
|
||||
@ -43,7 +43,7 @@ const char * ocl_config_strings[] =
|
||||
"NVIDIA_GPU,203,32,32,1,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0",
|
||||
"AMD_GPU,403,64,64,0,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0",
|
||||
#ifdef _SINGLE_SINGLE
|
||||
"INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,2",
|
||||
"INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,0",
|
||||
"APPLE_GPU,600,16,16,0,1,4,8,1,64,64,64,64,64,8,128,8,128,8,0",
|
||||
#else
|
||||
"INTEL_GPU,500,8,32,1,1,2,8,2,128,128,128,128,64,8,128,8,128,8,2",
|
||||
|
||||
@ -333,12 +333,12 @@ inline void ucl_prefetch(const __global int *p) {
|
||||
struct _lgpu_float3 {
|
||||
float x; float y; float z;
|
||||
};
|
||||
struct _lgpu_double3 {
|
||||
double x; double y; double z;
|
||||
};
|
||||
#ifdef _SINGLE_SINGLE
|
||||
#define acctyp3 struct _lgpu_float3
|
||||
#else
|
||||
struct _lgpu_double3 {
|
||||
double x; double y; double z;
|
||||
};
|
||||
#define acctyp3 struct _lgpu_double3
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user