GPU Package: Adding JIT test for OpenCL prefetch support.
This commit is contained in:
@ -95,7 +95,8 @@ class UCL_Program {
|
||||
|
||||
/// Load a program from a string and compile with flags
|
||||
inline int load_string(const void *program, const char *flags="",
|
||||
std::string *log=nullptr, FILE* foutput=nullptr) {
|
||||
std::string *log=nullptr, FILE* foutput=nullptr,
|
||||
const int compile_test=0) {
|
||||
cl_int error_flag;
|
||||
const char *prog=(const char *)program;
|
||||
_program=clCreateProgramWithSource(_context,1,&prog,nullptr,&error_flag);
|
||||
@ -131,6 +132,8 @@ class UCL_Program {
|
||||
}
|
||||
#endif
|
||||
|
||||
if (build_status != CL_SUCCESS && compile_test) return UCL_COMPILE_ERROR;
|
||||
|
||||
if (build_status != CL_SUCCESS || log!=NULL) {
|
||||
size_t ms;
|
||||
CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,
|
||||
|
||||
@ -26,6 +26,22 @@
|
||||
#if defined(USE_OPENCL)
|
||||
#include "device_cl.h"
|
||||
|
||||
const char *ocl_prefetch_test =
|
||||
" #if (NBOR_PREFETCH == 1) \n"\
|
||||
" inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); } \n"\
|
||||
" #else \n"\
|
||||
" enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\
|
||||
" LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC, \n"\
|
||||
" LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, }; \n"\
|
||||
" void __builtin_IB_lsc_prefetch_global_uint(const __global uint *, int, \n"\
|
||||
" enum LSC_LDCC); \n"\
|
||||
" inline void ucl_prefetch(const __global int *p) { \n"\
|
||||
" __builtin_IB_lsc_prefetch_global_uint((const __global uint *)p, 0, \n"\
|
||||
" LSC_LDCC_L1C_L3UC); \n"\
|
||||
" } \n"\
|
||||
" #endif \n"\
|
||||
" __kernel void ptest(__global int *i) { ucl_prefetch(i); i[0]++; } \n";
|
||||
|
||||
#ifdef LAL_OCL_EXTRA_ARGS
|
||||
#define LAL_DM_STRINGIFY(x) #x
|
||||
#define LAL_PRE_STRINGIFY(x) LAL_DM_STRINGIFY(x)
|
||||
@ -396,9 +412,31 @@ int DeviceT::set_ocl_params(std::string s_config, const std::string &extra_args)
|
||||
params[4]="0";
|
||||
#endif
|
||||
}
|
||||
|
||||
// Test OCL JIT to make sure any prefetch options are supported
|
||||
#ifdef LAL_DISABLE_PREFETCH
|
||||
params[18]="0";
|
||||
#endif
|
||||
_nbor_prefetch=-1;
|
||||
if (params[18]=="2") {
|
||||
_nbor_prefetch=2;
|
||||
UCL_Program ptest(*gpu);
|
||||
std::string ptest_args=_ocl_compile_string+" -DNBOR_PREFETCH="+params[18];
|
||||
int success=ptest.load_string(ocl_prefetch_test,ptest_args.c_str(),
|
||||
nullptr,nullptr,1);
|
||||
if (success!=UCL_SUCCESS) params[18]="1";
|
||||
}
|
||||
if (params[18]=="1") {
|
||||
_nbor_prefetch=1;
|
||||
UCL_Program ptest(*gpu);
|
||||
std::string ptest_args=_ocl_compile_string+" -DNBOR_PREFETCH="+params[18];
|
||||
int success=ptest.load_string(ocl_prefetch_test,ptest_args.c_str(),
|
||||
nullptr,nullptr,1);
|
||||
if (success!=UCL_SUCCESS) params[18]="0";
|
||||
}
|
||||
if (_nbor_prefetch<0) params[18]="0";
|
||||
if (params[18]=="0") _nbor_prefetch=0;
|
||||
|
||||
if (params[4]!="0") _ocl_compile_string+="-cl-fast-relaxed-math ";
|
||||
_ocl_compile_string+=std::string(OCL_INT_TYPE)+" "+
|
||||
std::string(OCL_PRECISION_COMPILE);
|
||||
@ -844,6 +882,10 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
||||
fprintf(screen,"Vector width: %d.\n", simd_size());
|
||||
fprintf(screen,"Prefetch mode: ");
|
||||
if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n");
|
||||
else if (_nbor_prefetch==1) fprintf(screen,"API.\n");
|
||||
else fprintf(screen,"None.\n");
|
||||
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
|
||||
if (nbor.gpu_nbor()==2)
|
||||
fprintf(screen,"CPU Neighbor: %.4f s.\n",times[8]/_replica_size);
|
||||
|
||||
@ -346,6 +346,7 @@ class Device {
|
||||
int _block_pair, _block_bio_pair, _block_ellipse;
|
||||
int _pppm_block, _block_nbor_build, _block_cell_2d, _block_cell_id;
|
||||
int _max_shared_types, _max_bio_shared_types, _pppm_max_spline;
|
||||
int _nbor_prefetch;
|
||||
|
||||
UCL_Program *dev_program;
|
||||
UCL_Kernel k_zero, k_info;
|
||||
|
||||
Reference in New Issue
Block a user