GPU Package: Adding JIT test for OpenCL prefetch support.
This commit is contained in:
@ -26,6 +26,22 @@
|
||||
#if defined(USE_OPENCL)
|
||||
#include "device_cl.h"
|
||||
|
||||
const char *ocl_prefetch_test =
|
||||
" #if (NBOR_PREFETCH == 1) \n"\
|
||||
" inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); } \n"\
|
||||
" #else \n"\
|
||||
" enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\
|
||||
" LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC, \n"\
|
||||
" LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, }; \n"\
|
||||
" void __builtin_IB_lsc_prefetch_global_uint(const __global uint *, int, \n"\
|
||||
" enum LSC_LDCC); \n"\
|
||||
" inline void ucl_prefetch(const __global int *p) { \n"\
|
||||
" __builtin_IB_lsc_prefetch_global_uint((const __global uint *)p, 0, \n"\
|
||||
" LSC_LDCC_L1C_L3UC); \n"\
|
||||
" } \n"\
|
||||
" #endif \n"\
|
||||
" __kernel void ptest(__global int *i) { ucl_prefetch(i); i[0]++; } \n";
|
||||
|
||||
#ifdef LAL_OCL_EXTRA_ARGS
|
||||
#define LAL_DM_STRINGIFY(x) #x
|
||||
#define LAL_PRE_STRINGIFY(x) LAL_DM_STRINGIFY(x)
|
||||
@ -396,9 +412,31 @@ int DeviceT::set_ocl_params(std::string s_config, const std::string &extra_args)
|
||||
params[4]="0";
|
||||
#endif
|
||||
}
|
||||
|
||||
// Test OCL JIT to make sure any prefetch options are supported
|
||||
#ifdef LAL_DISABLE_PREFETCH
|
||||
params[18]="0";
|
||||
#endif
|
||||
_nbor_prefetch=-1;
|
||||
if (params[18]=="2") {
|
||||
_nbor_prefetch=2;
|
||||
UCL_Program ptest(*gpu);
|
||||
std::string ptest_args=_ocl_compile_string+" -DNBOR_PREFETCH="+params[18];
|
||||
int success=ptest.load_string(ocl_prefetch_test,ptest_args.c_str(),
|
||||
nullptr,nullptr,1);
|
||||
if (success!=UCL_SUCCESS) params[18]="1";
|
||||
}
|
||||
if (params[18]=="1") {
|
||||
_nbor_prefetch=1;
|
||||
UCL_Program ptest(*gpu);
|
||||
std::string ptest_args=_ocl_compile_string+" -DNBOR_PREFETCH="+params[18];
|
||||
int success=ptest.load_string(ocl_prefetch_test,ptest_args.c_str(),
|
||||
nullptr,nullptr,1);
|
||||
if (success!=UCL_SUCCESS) params[18]="0";
|
||||
}
|
||||
if (_nbor_prefetch<0) params[18]="0";
|
||||
if (params[18]=="0") _nbor_prefetch=0;
|
||||
|
||||
if (params[4]!="0") _ocl_compile_string+="-cl-fast-relaxed-math ";
|
||||
_ocl_compile_string+=std::string(OCL_INT_TYPE)+" "+
|
||||
std::string(OCL_PRECISION_COMPILE);
|
||||
@ -844,6 +882,10 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
||||
fprintf(screen,"Vector width: %d.\n", simd_size());
|
||||
fprintf(screen,"Prefetch mode: ");
|
||||
if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n");
|
||||
else if (_nbor_prefetch==1) fprintf(screen,"API.\n");
|
||||
else fprintf(screen,"None.\n");
|
||||
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
|
||||
if (nbor.gpu_nbor()==2)
|
||||
fprintf(screen,"CPU Neighbor: %.4f s.\n",times[8]/_replica_size);
|
||||
|
||||
Reference in New Issue
Block a user