git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15248 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2016-07-01 23:27:26 +00:00
parent 8366b35459
commit 9656958169
245 changed files with 4890 additions and 4832 deletions

View File

@ -17,7 +17,7 @@
/* -----------------------------------------------------------------------
Copyright (2010) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
@ -36,10 +36,10 @@ namespace ucl_opencl {
// --------------------------------------------------------------------------
struct ocl_kernel_dim {
size_t x,y,z;
ocl_kernel_dim(size_t _x = 1, size_t _y = 1, size_t _z = 1) :
ocl_kernel_dim(size_t _x = 1, size_t _y = 1, size_t _z = 1) :
x(_x), y(_y), z(_z) {}
operator size_t * () { return (size_t *)this; }
operator const size_t * () const { return (const size_t *)this; }
operator const size_t * () const { return (const size_t *)this; }
};
typedef ocl_kernel_dim ucl_kernel_dim;
@ -53,13 +53,13 @@ typedef cl_mem device_ptr;
// --------------------------------------------------------------------------
template <class mat_type, class copy_type>
inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
cl_int error_flag;
cl_context context;
CL_SAFE_CALL(clGetMemObjectInfo(cm.cbegin(),CL_MEM_CONTEXT,sizeof(context),
&context,NULL));
cl_mem_flags buffer_perm;
cl_map_flags map_perm;
if (kind2==UCL_NOT_SPECIFIED) {
@ -88,7 +88,7 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
buffer_perm=CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR;
else
buffer_perm=CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR;
if (kind==UCL_READ_ONLY) {
#ifdef CL_VERSION_1_2
buffer_perm=buffer_perm | CL_MEM_HOST_READ_ONLY;
@ -102,9 +102,9 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
} else
map_perm=CL_MAP_READ | CL_MAP_WRITE;
}
mat.cbegin()=clCreateBuffer(context,buffer_perm,n,NULL,&error_flag);
if (error_flag != CL_SUCCESS)
if (error_flag != CL_SUCCESS)
return UCL_MEMORY_ERROR;
*mat.host_ptr() = (typename mat_type::data_type*)
clEnqueueMapBuffer(cm.cq(),mat.cbegin(),CL_TRUE,
@ -125,7 +125,7 @@ inline int _host_view(mat_type &mat, copy_type &cm, const size_t n) {
CL_SAFE_CALL(clGetMemObjectInfo(cm.cbegin(),CL_MEM_FLAGS,sizeof(orig_flags),
&orig_flags,NULL));
orig_flags=orig_flags & ~CL_MEM_ALLOC_HOST_PTR;
mat.cbegin()=clCreateBuffer(context, CL_MEM_USE_HOST_PTR | orig_flags, n,
*mat.host_ptr(), &error_flag);
@ -135,7 +135,7 @@ inline int _host_view(mat_type &mat, copy_type &cm, const size_t n) {
}
template <class mat_type>
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
cl_mem_flags buffer_perm;
cl_map_flags map_perm;
@ -160,7 +160,7 @@ inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
cl_int error_flag;
mat.cbegin()=clCreateBuffer(dev.context(),buffer_perm,n,NULL,&error_flag);
if (error_flag != CL_SUCCESS)
if (error_flag != CL_SUCCESS)
return UCL_MEMORY_ERROR;
*mat.host_ptr() = (typename mat_type::data_type*)
@ -210,7 +210,7 @@ inline int _host_resize(mat_type &mat, const size_t n) {
map_perm=CL_MAP_READ | CL_MAP_WRITE;
mat.cbegin()=clCreateBuffer(context,buffer_perm,n,NULL,&error_flag);
if (error_flag != CL_SUCCESS)
if (error_flag != CL_SUCCESS)
return UCL_MEMORY_ERROR;
*mat.host_ptr() = (typename mat_type::data_type*)
clEnqueueMapBuffer(mat.cq(),mat.cbegin(),CL_TRUE,
@ -248,7 +248,7 @@ inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t n,
else
assert(0==1);
mat.cbegin()=clCreateBuffer(context,flag,n,NULL,&error_flag);
if (error_flag != CL_SUCCESS)
if (error_flag != CL_SUCCESS)
return UCL_MEMORY_ERROR;
mat.cq()=cm.cq();
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
@ -278,7 +278,7 @@ inline int _device_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
assert(0==1);
mat.cbegin()=clCreateBuffer(dev.context(),flag,n,NULL,
&error_flag);
if (error_flag != CL_SUCCESS)
if (error_flag != CL_SUCCESS)
return UCL_MEMORY_ERROR;
mat.cq()=dev.cq();
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
@ -304,7 +304,7 @@ inline int _device_alloc(mat_type &mat, UCL_Device &dev, const size_t rows,
if (dev.device_type()!=UCL_CPU && cols%256!=0)
padded_cols+=256-cols%256;
pitch=padded_cols*sizeof(typename mat_type::data_type);
return _device_alloc(mat,dev,pitch*rows,kind);
return _device_alloc(mat,dev,pitch*rows,kind);
}
template <class mat_type>
@ -342,7 +342,7 @@ inline int _device_resize(mat_type &mat, const size_t n) {
else
assert(0==1);
mat.cbegin()=clCreateBuffer(context,flag,n,NULL,&error_flag);
if (error_flag != CL_SUCCESS)
if (error_flag != CL_SUCCESS)
return UCL_MEMORY_ERROR;
return UCL_SUCCESS;
}
@ -380,7 +380,7 @@ inline int _device_resize(mat_type &mat, const size_t rows,
else
assert(0==1);
mat.cbegin()=clCreateBuffer(context,flag,pitch*rows,NULL,&error_flag);
if (error_flag != CL_SUCCESS)
if (error_flag != CL_SUCCESS)
return UCL_MEMORY_ERROR;
return UCL_SUCCESS;
}
@ -396,21 +396,21 @@ inline void _host_zero(void *ptr, const size_t n) {
inline void _ocl_build(cl_program &program, cl_device_id &device,
const char* options = "") {
clBuildProgram(program,1,&device,options,NULL,NULL);
cl_build_status build_status;
CL_SAFE_CALL(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
CL_SAFE_CALL(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
sizeof(cl_build_status),&build_status,
NULL));
if (build_status == CL_SUCCESS)
return;
size_t ms;
CL_SAFE_CALL(clGetProgramBuildInfo(program, device,CL_PROGRAM_BUILD_LOG, 0,
CL_SAFE_CALL(clGetProgramBuildInfo(program, device,CL_PROGRAM_BUILD_LOG, 0,
NULL, &ms));
char build_log[ms];
char build_log[ms];
CL_SAFE_CALL(clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,ms,
build_log, NULL));
std::cerr << std::endl
<< "----------------------------------------------------------\n"
<< " Error compiling OpenCL Program...\n"
@ -423,13 +423,13 @@ inline void _ocl_kernel_from_source(cl_context &context, cl_device_id &device,
cl_kernel &kernel, const char *function,
const char *options="") {
cl_int error_flag;
cl_program program=clCreateProgramWithSource(context,lines,source,
NULL,&error_flag);
CL_CHECK_ERR(error_flag);
CL_CHECK_ERR(error_flag);
_ocl_build(program,device,options);
kernel=clCreateKernel(program,function,&error_flag);
CL_CHECK_ERR(error_flag);
CL_CHECK_ERR(error_flag);
}
template <class mat_type>
@ -452,17 +452,17 @@ inline void _device_zero(mat_type &mat, const size_t n, command_queue &cq) {
cl_device_id device;
CL_SAFE_CALL(clGetContextInfo(context,CL_CONTEXT_DEVICES,
sizeof(cl_device_id),&device,NULL));
const char * szero[3]={
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void _device_zero(__global NUMTYP *a, const int offset)",
" { int gid=get_global_id(0)+offset; a[gid]=(NUMTYP)0; }"
};
cl_kernel kzero;
_ocl_kernel_from_source(context,device,szero,3,kzero,"_device_zero",
_UCL_DATA_ID<typename mat_type::data_type>::numtyp_flag());
cl_int offset=mat.offset();
CL_SAFE_CALL(clSetKernelArg(kzero,0,sizeof(cl_mem),(void *)&mat.begin()));
CL_SAFE_CALL(clSetKernelArg(kzero,1,sizeof(cl_int),(void *)&offset));
@ -486,7 +486,7 @@ template<> struct _ucl_memcpy<2,2> {
assert(0==1);
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
@ -504,7 +504,7 @@ template<> struct _ucl_memcpy<2,0> {
assert(0==1);
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
@ -522,7 +522,7 @@ template<> struct _ucl_memcpy<2,1> {
assert(0==1);
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
@ -540,7 +540,7 @@ template<> struct _ucl_memcpy<0,2> {
assert(0==1);
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
@ -558,7 +558,7 @@ template<> struct _ucl_memcpy<1,2> {
assert(0==1);
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
@ -587,9 +587,9 @@ template <> struct _ucl_memcpy<1,0> {
dst.begin(),0,NULL,NULL));
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
size_t dst_offset, size_t src_offset) {
if (src.cbegin()==dst.cbegin()) {
@ -602,20 +602,20 @@ template <> struct _ucl_memcpy<1,0> {
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 2NS\n";
#endif
if (spitch==dpitch && dst.cols()==src.cols() &&
if (spitch==dpitch && dst.cols()==src.cols() &&
src.cols()==cols/src.element_size())
CL_SAFE_CALL(clEnqueueReadBuffer(cq,src.cbegin(),block,src_offset,
spitch*rows,
(char *)dst.begin()+dst_offset,0,NULL,
NULL));
else
for (size_t i=0; i<rows; i++) {
for (size_t i=0; i<rows; i++) {
CL_SAFE_CALL(clEnqueueReadBuffer(cq,src.cbegin(),block,src_offset,cols,
(char *)dst.begin()+dst_offset,0,NULL,
NULL));
src_offset+=spitch;
dst_offset+=dpitch;
}
}
}
};
@ -630,7 +630,7 @@ template <> struct _ucl_memcpy<0,1> {
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 3S\n";
#endif
return;
return;
}
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 3NS\n";
@ -639,9 +639,9 @@ template <> struct _ucl_memcpy<0,1> {
src.begin(),0,NULL,NULL));
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
size_t dst_offset, size_t src_offset) {
if (src.cbegin()==dst.cbegin()) {
@ -649,12 +649,12 @@ template <> struct _ucl_memcpy<0,1> {
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 4S\n";
#endif
return;
return;
}
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 4NS\n";
#endif
if (spitch==dpitch && dst.cols()==src.cols() &&
if (spitch==dpitch && dst.cols()==src.cols() &&
src.cols()==cols/src.element_size())
CL_SAFE_CALL(clEnqueueWriteBuffer(cq,dst.cbegin(),block,dst_offset,
spitch*rows,
@ -667,7 +667,7 @@ template <> struct _ucl_memcpy<0,1> {
NULL));
src_offset+=spitch;
dst_offset+=dpitch;
}
}
}
};
@ -687,33 +687,33 @@ template <int mem1, int mem2> struct _ucl_memcpy {
#ifdef UCL_DBG_MEM_TRACE
else std::cerr << "UCL_COPY 6S\n";
#endif
if (block==CL_TRUE) ucl_sync(cq);
}
template <class p1, class p2>
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
const size_t spitch, const size_t cols,
const size_t rows, cl_command_queue &cq,
const cl_bool block,
size_t dst_offset, size_t src_offset) {
if (src.cbegin()!=dst.cbegin() || src_offset!=dst_offset) {
if (src.cbegin()!=dst.cbegin() || src_offset!=dst_offset) {
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 7NS\n";
#endif
if (spitch==dpitch && dst.cols()==src.cols() &&
if (spitch==dpitch && dst.cols()==src.cols() &&
src.cols()==cols/src.element_size())
CL_SAFE_CALL(clEnqueueCopyBuffer(cq,src.cbegin(),dst.cbegin(),src_offset,
dst_offset,spitch*rows,0,NULL,NULL));
else
for (size_t i=0; i<rows; i++) {
for (size_t i=0; i<rows; i++) {
CL_SAFE_CALL(clEnqueueCopyBuffer(cq,src.cbegin(),dst.cbegin(),
src_offset,dst_offset,cols,0,
NULL,NULL));
src_offset+=spitch;
dst_offset+=dpitch;
}
}
}
}
#ifdef UCL_DBG_MEM_TRACE
else std::cerr << "UCL_COPY 7S\n";
#endif
@ -736,8 +736,8 @@ inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n,
}
template<class mat1, class mat2>
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
const size_t spitch, const size_t cols,
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
const size_t spitch, const size_t cols,
const size_t rows) {
_ucl_memcpy<mat1::MEM_TYPE,mat2::MEM_TYPE>::mc(dst,dpitch,src,spitch,cols,
rows,dst.cq(),CL_TRUE,
@ -745,15 +745,15 @@ inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
}
template<class mat1, class mat2>
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
const size_t spitch, const size_t cols,
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
const size_t spitch, const size_t cols,
const size_t rows,cl_command_queue &cq) {
_ucl_memcpy<mat1::MEM_TYPE,mat2::MEM_TYPE>::mc(dst,dpitch,src,spitch,cols,
rows,cq,CL_FALSE,
dst.byteoff(),src.byteoff());
}
} // namespace ucl_cudart
} // namespace ucl_cudart
#endif