GPU Package: Adding JIT test for OpenCL prefetch support.
This commit is contained in:
@ -95,7 +95,8 @@ class UCL_Program {
|
|||||||
|
|
||||||
/// Load a program from a string and compile with flags
|
/// Load a program from a string and compile with flags
|
||||||
inline int load_string(const void *program, const char *flags="",
|
inline int load_string(const void *program, const char *flags="",
|
||||||
std::string *log=nullptr, FILE* foutput=nullptr) {
|
std::string *log=nullptr, FILE* foutput=nullptr,
|
||||||
|
const int compile_test=0) {
|
||||||
cl_int error_flag;
|
cl_int error_flag;
|
||||||
const char *prog=(const char *)program;
|
const char *prog=(const char *)program;
|
||||||
_program=clCreateProgramWithSource(_context,1,&prog,nullptr,&error_flag);
|
_program=clCreateProgramWithSource(_context,1,&prog,nullptr,&error_flag);
|
||||||
@ -131,6 +132,8 @@ class UCL_Program {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (build_status != CL_SUCCESS && compile_test) return UCL_COMPILE_ERROR;
|
||||||
|
|
||||||
if (build_status != CL_SUCCESS || log!=NULL) {
|
if (build_status != CL_SUCCESS || log!=NULL) {
|
||||||
size_t ms;
|
size_t ms;
|
||||||
CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,
|
CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,
|
||||||
|
|||||||
@ -26,6 +26,22 @@
|
|||||||
#if defined(USE_OPENCL)
|
#if defined(USE_OPENCL)
|
||||||
#include "device_cl.h"
|
#include "device_cl.h"
|
||||||
|
|
||||||
|
const char *ocl_prefetch_test =
|
||||||
|
" #if (NBOR_PREFETCH == 1) \n"\
|
||||||
|
" inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); } \n"\
|
||||||
|
" #else \n"\
|
||||||
|
" enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\
|
||||||
|
" LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC, \n"\
|
||||||
|
" LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, }; \n"\
|
||||||
|
" void __builtin_IB_lsc_prefetch_global_uint(const __global uint *, int, \n"\
|
||||||
|
" enum LSC_LDCC); \n"\
|
||||||
|
" inline void ucl_prefetch(const __global int *p) { \n"\
|
||||||
|
" __builtin_IB_lsc_prefetch_global_uint((const __global uint *)p, 0, \n"\
|
||||||
|
" LSC_LDCC_L1C_L3UC); \n"\
|
||||||
|
" } \n"\
|
||||||
|
" #endif \n"\
|
||||||
|
" __kernel void ptest(__global int *i) { ucl_prefetch(i); i[0]++; } \n";
|
||||||
|
|
||||||
#ifdef LAL_OCL_EXTRA_ARGS
|
#ifdef LAL_OCL_EXTRA_ARGS
|
||||||
#define LAL_DM_STRINGIFY(x) #x
|
#define LAL_DM_STRINGIFY(x) #x
|
||||||
#define LAL_PRE_STRINGIFY(x) LAL_DM_STRINGIFY(x)
|
#define LAL_PRE_STRINGIFY(x) LAL_DM_STRINGIFY(x)
|
||||||
@ -396,9 +412,31 @@ int DeviceT::set_ocl_params(std::string s_config, const std::string &extra_args)
|
|||||||
params[4]="0";
|
params[4]="0";
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test OCL JIT to make sure any prefetch options are supported
|
||||||
#ifdef LAL_DISABLE_PREFETCH
|
#ifdef LAL_DISABLE_PREFETCH
|
||||||
params[18]="0";
|
params[18]="0";
|
||||||
#endif
|
#endif
|
||||||
|
_nbor_prefetch=-1;
|
||||||
|
if (params[18]=="2") {
|
||||||
|
_nbor_prefetch=2;
|
||||||
|
UCL_Program ptest(*gpu);
|
||||||
|
std::string ptest_args=_ocl_compile_string+" -DNBOR_PREFETCH="+params[18];
|
||||||
|
int success=ptest.load_string(ocl_prefetch_test,ptest_args.c_str(),
|
||||||
|
nullptr,nullptr,1);
|
||||||
|
if (success!=UCL_SUCCESS) params[18]="1";
|
||||||
|
}
|
||||||
|
if (params[18]=="1") {
|
||||||
|
_nbor_prefetch=1;
|
||||||
|
UCL_Program ptest(*gpu);
|
||||||
|
std::string ptest_args=_ocl_compile_string+" -DNBOR_PREFETCH="+params[18];
|
||||||
|
int success=ptest.load_string(ocl_prefetch_test,ptest_args.c_str(),
|
||||||
|
nullptr,nullptr,1);
|
||||||
|
if (success!=UCL_SUCCESS) params[18]="0";
|
||||||
|
}
|
||||||
|
if (_nbor_prefetch<0) params[18]="0";
|
||||||
|
if (params[18]=="0") _nbor_prefetch=0;
|
||||||
|
|
||||||
if (params[4]!="0") _ocl_compile_string+="-cl-fast-relaxed-math ";
|
if (params[4]!="0") _ocl_compile_string+="-cl-fast-relaxed-math ";
|
||||||
_ocl_compile_string+=std::string(OCL_INT_TYPE)+" "+
|
_ocl_compile_string+=std::string(OCL_INT_TYPE)+" "+
|
||||||
std::string(OCL_PRECISION_COMPILE);
|
std::string(OCL_PRECISION_COMPILE);
|
||||||
@ -844,6 +882,10 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
|||||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||||
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
||||||
fprintf(screen,"Vector width: %d.\n", simd_size());
|
fprintf(screen,"Vector width: %d.\n", simd_size());
|
||||||
|
fprintf(screen,"Prefetch mode: ");
|
||||||
|
if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n");
|
||||||
|
else if (_nbor_prefetch==1) fprintf(screen,"API.\n");
|
||||||
|
else fprintf(screen,"None.\n");
|
||||||
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
|
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
|
||||||
if (nbor.gpu_nbor()==2)
|
if (nbor.gpu_nbor()==2)
|
||||||
fprintf(screen,"CPU Neighbor: %.4f s.\n",times[8]/_replica_size);
|
fprintf(screen,"CPU Neighbor: %.4f s.\n",times[8]/_replica_size);
|
||||||
|
|||||||
@ -346,6 +346,7 @@ class Device {
|
|||||||
int _block_pair, _block_bio_pair, _block_ellipse;
|
int _block_pair, _block_bio_pair, _block_ellipse;
|
||||||
int _pppm_block, _block_nbor_build, _block_cell_2d, _block_cell_id;
|
int _pppm_block, _block_nbor_build, _block_cell_2d, _block_cell_id;
|
||||||
int _max_shared_types, _max_bio_shared_types, _pppm_max_spline;
|
int _max_shared_types, _max_bio_shared_types, _pppm_max_spline;
|
||||||
|
int _nbor_prefetch;
|
||||||
|
|
||||||
UCL_Program *dev_program;
|
UCL_Program *dev_program;
|
||||||
UCL_Kernel k_zero, k_info;
|
UCL_Kernel k_zero, k_info;
|
||||||
|
|||||||
Reference in New Issue
Block a user