git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15248 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -17,7 +17,7 @@
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2010) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
@ -36,10 +36,10 @@ namespace ucl_opencl {
|
||||
// --------------------------------------------------------------------------
|
||||
struct ocl_kernel_dim {
|
||||
size_t x,y,z;
|
||||
ocl_kernel_dim(size_t _x = 1, size_t _y = 1, size_t _z = 1) :
|
||||
ocl_kernel_dim(size_t _x = 1, size_t _y = 1, size_t _z = 1) :
|
||||
x(_x), y(_y), z(_z) {}
|
||||
operator size_t * () { return (size_t *)this; }
|
||||
operator const size_t * () const { return (const size_t *)this; }
|
||||
operator const size_t * () const { return (const size_t *)this; }
|
||||
};
|
||||
typedef ocl_kernel_dim ucl_kernel_dim;
|
||||
|
||||
@ -53,13 +53,13 @@ typedef cl_mem device_ptr;
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
template <class mat_type, class copy_type>
|
||||
inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
|
||||
cl_int error_flag;
|
||||
cl_context context;
|
||||
CL_SAFE_CALL(clGetMemObjectInfo(cm.cbegin(),CL_MEM_CONTEXT,sizeof(context),
|
||||
&context,NULL));
|
||||
|
||||
|
||||
cl_mem_flags buffer_perm;
|
||||
cl_map_flags map_perm;
|
||||
if (kind2==UCL_NOT_SPECIFIED) {
|
||||
@ -88,7 +88,7 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
buffer_perm=CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR;
|
||||
else
|
||||
buffer_perm=CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR;
|
||||
|
||||
|
||||
if (kind==UCL_READ_ONLY) {
|
||||
#ifdef CL_VERSION_1_2
|
||||
buffer_perm=buffer_perm | CL_MEM_HOST_READ_ONLY;
|
||||
@ -102,9 +102,9 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
} else
|
||||
map_perm=CL_MAP_READ | CL_MAP_WRITE;
|
||||
}
|
||||
|
||||
|
||||
mat.cbegin()=clCreateBuffer(context,buffer_perm,n,NULL,&error_flag);
|
||||
if (error_flag != CL_SUCCESS)
|
||||
if (error_flag != CL_SUCCESS)
|
||||
return UCL_MEMORY_ERROR;
|
||||
*mat.host_ptr() = (typename mat_type::data_type*)
|
||||
clEnqueueMapBuffer(cm.cq(),mat.cbegin(),CL_TRUE,
|
||||
@ -125,7 +125,7 @@ inline int _host_view(mat_type &mat, copy_type &cm, const size_t n) {
|
||||
CL_SAFE_CALL(clGetMemObjectInfo(cm.cbegin(),CL_MEM_FLAGS,sizeof(orig_flags),
|
||||
&orig_flags,NULL));
|
||||
orig_flags=orig_flags & ~CL_MEM_ALLOC_HOST_PTR;
|
||||
|
||||
|
||||
mat.cbegin()=clCreateBuffer(context, CL_MEM_USE_HOST_PTR | orig_flags, n,
|
||||
*mat.host_ptr(), &error_flag);
|
||||
|
||||
@ -135,7 +135,7 @@ inline int _host_view(mat_type &mat, copy_type &cm, const size_t n) {
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
|
||||
cl_mem_flags buffer_perm;
|
||||
cl_map_flags map_perm;
|
||||
@ -160,7 +160,7 @@ inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||
|
||||
cl_int error_flag;
|
||||
mat.cbegin()=clCreateBuffer(dev.context(),buffer_perm,n,NULL,&error_flag);
|
||||
if (error_flag != CL_SUCCESS)
|
||||
if (error_flag != CL_SUCCESS)
|
||||
return UCL_MEMORY_ERROR;
|
||||
|
||||
*mat.host_ptr() = (typename mat_type::data_type*)
|
||||
@ -210,7 +210,7 @@ inline int _host_resize(mat_type &mat, const size_t n) {
|
||||
map_perm=CL_MAP_READ | CL_MAP_WRITE;
|
||||
|
||||
mat.cbegin()=clCreateBuffer(context,buffer_perm,n,NULL,&error_flag);
|
||||
if (error_flag != CL_SUCCESS)
|
||||
if (error_flag != CL_SUCCESS)
|
||||
return UCL_MEMORY_ERROR;
|
||||
*mat.host_ptr() = (typename mat_type::data_type*)
|
||||
clEnqueueMapBuffer(mat.cq(),mat.cbegin(),CL_TRUE,
|
||||
@ -248,7 +248,7 @@ inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
else
|
||||
assert(0==1);
|
||||
mat.cbegin()=clCreateBuffer(context,flag,n,NULL,&error_flag);
|
||||
if (error_flag != CL_SUCCESS)
|
||||
if (error_flag != CL_SUCCESS)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=cm.cq();
|
||||
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
|
||||
@ -278,7 +278,7 @@ inline int _device_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||
assert(0==1);
|
||||
mat.cbegin()=clCreateBuffer(dev.context(),flag,n,NULL,
|
||||
&error_flag);
|
||||
if (error_flag != CL_SUCCESS)
|
||||
if (error_flag != CL_SUCCESS)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=dev.cq();
|
||||
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
|
||||
@ -304,7 +304,7 @@ inline int _device_alloc(mat_type &mat, UCL_Device &dev, const size_t rows,
|
||||
if (dev.device_type()!=UCL_CPU && cols%256!=0)
|
||||
padded_cols+=256-cols%256;
|
||||
pitch=padded_cols*sizeof(typename mat_type::data_type);
|
||||
return _device_alloc(mat,dev,pitch*rows,kind);
|
||||
return _device_alloc(mat,dev,pitch*rows,kind);
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
@ -342,7 +342,7 @@ inline int _device_resize(mat_type &mat, const size_t n) {
|
||||
else
|
||||
assert(0==1);
|
||||
mat.cbegin()=clCreateBuffer(context,flag,n,NULL,&error_flag);
|
||||
if (error_flag != CL_SUCCESS)
|
||||
if (error_flag != CL_SUCCESS)
|
||||
return UCL_MEMORY_ERROR;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
@ -380,7 +380,7 @@ inline int _device_resize(mat_type &mat, const size_t rows,
|
||||
else
|
||||
assert(0==1);
|
||||
mat.cbegin()=clCreateBuffer(context,flag,pitch*rows,NULL,&error_flag);
|
||||
if (error_flag != CL_SUCCESS)
|
||||
if (error_flag != CL_SUCCESS)
|
||||
return UCL_MEMORY_ERROR;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
@ -396,21 +396,21 @@ inline void _host_zero(void *ptr, const size_t n) {
|
||||
inline void _ocl_build(cl_program &program, cl_device_id &device,
|
||||
const char* options = "") {
|
||||
clBuildProgram(program,1,&device,options,NULL,NULL);
|
||||
|
||||
|
||||
cl_build_status build_status;
|
||||
CL_SAFE_CALL(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
|
||||
CL_SAFE_CALL(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
|
||||
sizeof(cl_build_status),&build_status,
|
||||
NULL));
|
||||
if (build_status == CL_SUCCESS)
|
||||
return;
|
||||
|
||||
|
||||
size_t ms;
|
||||
CL_SAFE_CALL(clGetProgramBuildInfo(program, device,CL_PROGRAM_BUILD_LOG, 0,
|
||||
CL_SAFE_CALL(clGetProgramBuildInfo(program, device,CL_PROGRAM_BUILD_LOG, 0,
|
||||
NULL, &ms));
|
||||
char build_log[ms];
|
||||
char build_log[ms];
|
||||
CL_SAFE_CALL(clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,ms,
|
||||
build_log, NULL));
|
||||
|
||||
|
||||
std::cerr << std::endl
|
||||
<< "----------------------------------------------------------\n"
|
||||
<< " Error compiling OpenCL Program...\n"
|
||||
@ -423,13 +423,13 @@ inline void _ocl_kernel_from_source(cl_context &context, cl_device_id &device,
|
||||
cl_kernel &kernel, const char *function,
|
||||
const char *options="") {
|
||||
cl_int error_flag;
|
||||
|
||||
|
||||
cl_program program=clCreateProgramWithSource(context,lines,source,
|
||||
NULL,&error_flag);
|
||||
CL_CHECK_ERR(error_flag);
|
||||
CL_CHECK_ERR(error_flag);
|
||||
_ocl_build(program,device,options);
|
||||
kernel=clCreateKernel(program,function,&error_flag);
|
||||
CL_CHECK_ERR(error_flag);
|
||||
CL_CHECK_ERR(error_flag);
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
@ -452,17 +452,17 @@ inline void _device_zero(mat_type &mat, const size_t n, command_queue &cq) {
|
||||
cl_device_id device;
|
||||
CL_SAFE_CALL(clGetContextInfo(context,CL_CONTEXT_DEVICES,
|
||||
sizeof(cl_device_id),&device,NULL));
|
||||
|
||||
|
||||
const char * szero[3]={
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void _device_zero(__global NUMTYP *a, const int offset)",
|
||||
" { int gid=get_global_id(0)+offset; a[gid]=(NUMTYP)0; }"
|
||||
};
|
||||
|
||||
|
||||
cl_kernel kzero;
|
||||
_ocl_kernel_from_source(context,device,szero,3,kzero,"_device_zero",
|
||||
_UCL_DATA_ID<typename mat_type::data_type>::numtyp_flag());
|
||||
|
||||
|
||||
cl_int offset=mat.offset();
|
||||
CL_SAFE_CALL(clSetKernelArg(kzero,0,sizeof(cl_mem),(void *)&mat.begin()));
|
||||
CL_SAFE_CALL(clSetKernelArg(kzero,1,sizeof(cl_int),(void *)&offset));
|
||||
@ -486,7 +486,7 @@ template<> struct _ucl_memcpy<2,2> {
|
||||
assert(0==1);
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
@ -504,7 +504,7 @@ template<> struct _ucl_memcpy<2,0> {
|
||||
assert(0==1);
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
@ -522,7 +522,7 @@ template<> struct _ucl_memcpy<2,1> {
|
||||
assert(0==1);
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
@ -540,7 +540,7 @@ template<> struct _ucl_memcpy<0,2> {
|
||||
assert(0==1);
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
@ -558,7 +558,7 @@ template<> struct _ucl_memcpy<1,2> {
|
||||
assert(0==1);
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
@ -587,9 +587,9 @@ template <> struct _ucl_memcpy<1,0> {
|
||||
dst.begin(),0,NULL,NULL));
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
size_t dst_offset, size_t src_offset) {
|
||||
if (src.cbegin()==dst.cbegin()) {
|
||||
@ -602,20 +602,20 @@ template <> struct _ucl_memcpy<1,0> {
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
std::cerr << "UCL_COPY 2NS\n";
|
||||
#endif
|
||||
if (spitch==dpitch && dst.cols()==src.cols() &&
|
||||
if (spitch==dpitch && dst.cols()==src.cols() &&
|
||||
src.cols()==cols/src.element_size())
|
||||
CL_SAFE_CALL(clEnqueueReadBuffer(cq,src.cbegin(),block,src_offset,
|
||||
spitch*rows,
|
||||
(char *)dst.begin()+dst_offset,0,NULL,
|
||||
NULL));
|
||||
else
|
||||
for (size_t i=0; i<rows; i++) {
|
||||
for (size_t i=0; i<rows; i++) {
|
||||
CL_SAFE_CALL(clEnqueueReadBuffer(cq,src.cbegin(),block,src_offset,cols,
|
||||
(char *)dst.begin()+dst_offset,0,NULL,
|
||||
NULL));
|
||||
src_offset+=spitch;
|
||||
dst_offset+=dpitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -630,7 +630,7 @@ template <> struct _ucl_memcpy<0,1> {
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
std::cerr << "UCL_COPY 3S\n";
|
||||
#endif
|
||||
return;
|
||||
return;
|
||||
}
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
std::cerr << "UCL_COPY 3NS\n";
|
||||
@ -639,9 +639,9 @@ template <> struct _ucl_memcpy<0,1> {
|
||||
src.begin(),0,NULL,NULL));
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
size_t dst_offset, size_t src_offset) {
|
||||
if (src.cbegin()==dst.cbegin()) {
|
||||
@ -649,12 +649,12 @@ template <> struct _ucl_memcpy<0,1> {
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
std::cerr << "UCL_COPY 4S\n";
|
||||
#endif
|
||||
return;
|
||||
return;
|
||||
}
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
std::cerr << "UCL_COPY 4NS\n";
|
||||
#endif
|
||||
if (spitch==dpitch && dst.cols()==src.cols() &&
|
||||
if (spitch==dpitch && dst.cols()==src.cols() &&
|
||||
src.cols()==cols/src.element_size())
|
||||
CL_SAFE_CALL(clEnqueueWriteBuffer(cq,dst.cbegin(),block,dst_offset,
|
||||
spitch*rows,
|
||||
@ -667,7 +667,7 @@ template <> struct _ucl_memcpy<0,1> {
|
||||
NULL));
|
||||
src_offset+=spitch;
|
||||
dst_offset+=dpitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -687,33 +687,33 @@ template <int mem1, int mem2> struct _ucl_memcpy {
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
else std::cerr << "UCL_COPY 6S\n";
|
||||
#endif
|
||||
|
||||
|
||||
if (block==CL_TRUE) ucl_sync(cq);
|
||||
}
|
||||
template <class p1, class p2>
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows, cl_command_queue &cq,
|
||||
const cl_bool block,
|
||||
size_t dst_offset, size_t src_offset) {
|
||||
if (src.cbegin()!=dst.cbegin() || src_offset!=dst_offset) {
|
||||
if (src.cbegin()!=dst.cbegin() || src_offset!=dst_offset) {
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
std::cerr << "UCL_COPY 7NS\n";
|
||||
#endif
|
||||
if (spitch==dpitch && dst.cols()==src.cols() &&
|
||||
if (spitch==dpitch && dst.cols()==src.cols() &&
|
||||
src.cols()==cols/src.element_size())
|
||||
CL_SAFE_CALL(clEnqueueCopyBuffer(cq,src.cbegin(),dst.cbegin(),src_offset,
|
||||
dst_offset,spitch*rows,0,NULL,NULL));
|
||||
|
||||
|
||||
else
|
||||
for (size_t i=0; i<rows; i++) {
|
||||
for (size_t i=0; i<rows; i++) {
|
||||
CL_SAFE_CALL(clEnqueueCopyBuffer(cq,src.cbegin(),dst.cbegin(),
|
||||
src_offset,dst_offset,cols,0,
|
||||
NULL,NULL));
|
||||
src_offset+=spitch;
|
||||
dst_offset+=dpitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef UCL_DBG_MEM_TRACE
|
||||
else std::cerr << "UCL_COPY 7S\n";
|
||||
#endif
|
||||
@ -736,8 +736,8 @@ inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n,
|
||||
}
|
||||
|
||||
template<class mat1, class mat2>
|
||||
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows) {
|
||||
_ucl_memcpy<mat1::MEM_TYPE,mat2::MEM_TYPE>::mc(dst,dpitch,src,spitch,cols,
|
||||
rows,dst.cq(),CL_TRUE,
|
||||
@ -745,15 +745,15 @@ inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
|
||||
}
|
||||
|
||||
template<class mat1, class mat2>
|
||||
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows,cl_command_queue &cq) {
|
||||
_ucl_memcpy<mat1::MEM_TYPE,mat2::MEM_TYPE>::mc(dst,dpitch,src,spitch,cols,
|
||||
rows,cq,CL_FALSE,
|
||||
dst.byteoff(),src.byteoff());
|
||||
}
|
||||
|
||||
} // namespace ucl_cudart
|
||||
} // namespace ucl_cudart
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user