silence compiler warnings about unused parameters and variables
This commit is contained in:
@ -26,6 +26,9 @@
|
|||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
#define UCL_SYNC_DEBUG
|
#define UCL_SYNC_DEBUG
|
||||||
#define UCL_DESTRUCT_CHECK
|
#define UCL_DESTRUCT_CHECK
|
||||||
|
#define UCL_DEBUG_ARG(arg) arg
|
||||||
|
#else
|
||||||
|
#define UCL_DEBUG_ARG(arg)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef UCL_NO_API_CHECK
|
#ifndef UCL_NO_API_CHECK
|
||||||
|
|||||||
@ -33,6 +33,9 @@
|
|||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
#define UCL_SYNC_DEBUG
|
#define UCL_SYNC_DEBUG
|
||||||
#define UCL_DESTRUCT_CHECK
|
#define UCL_DESTRUCT_CHECK
|
||||||
|
#define UCL_DEBUG_ARG(arg) arg
|
||||||
|
#else
|
||||||
|
#define UCL_DEBUG_ARG(arg)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef UCL_NO_API_CHECK
|
#ifndef UCL_NO_API_CHECK
|
||||||
|
|||||||
@ -309,15 +309,14 @@ class UCL_Device {
|
|||||||
/// Return the maximum memory pitch in bytes for current device
|
/// Return the maximum memory pitch in bytes for current device
|
||||||
inline size_t max_pitch() { return max_pitch(_device); }
|
inline size_t max_pitch() { return max_pitch(_device); }
|
||||||
/// Return the maximum memory pitch in bytes
|
/// Return the maximum memory pitch in bytes
|
||||||
inline size_t max_pitch(const int i) { return 0; }
|
inline size_t max_pitch(const int) { return 0; }
|
||||||
|
|
||||||
/// Returns false if accelerator cannot be shared by multiple processes
|
/// Returns false if accelerator cannot be shared by multiple processes
|
||||||
/** If it cannot be determined, true is returned **/
|
/** If it cannot be determined, true is returned **/
|
||||||
inline bool sharing_supported() { return sharing_supported(_device); }
|
inline bool sharing_supported() { return sharing_supported(_device); }
|
||||||
/// Returns false if accelerator cannot be shared by multiple processes
|
/// Returns false if accelerator cannot be shared by multiple processes
|
||||||
/** If it cannot be determined, true is returned **/
|
/** If it cannot be determined, true is returned **/
|
||||||
inline bool sharing_supported(const int i)
|
inline bool sharing_supported(const int) { return true; }
|
||||||
{ return true; }
|
|
||||||
|
|
||||||
/// True if the device is a sub-device
|
/// True if the device is a sub-device
|
||||||
inline bool is_subdevice()
|
inline bool is_subdevice()
|
||||||
|
|||||||
@ -33,6 +33,9 @@
|
|||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
#define UCL_SYNC_DEBUG
|
#define UCL_SYNC_DEBUG
|
||||||
#define UCL_DESTRUCT_CHECK
|
#define UCL_DESTRUCT_CHECK
|
||||||
|
#define UCL_DEBUG_ARG(arg) arg
|
||||||
|
#else
|
||||||
|
#define UCL_DEBUG_ARG(arg)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef UCL_NO_API_CHECK
|
#ifndef UCL_NO_API_CHECK
|
||||||
|
|||||||
@ -137,7 +137,7 @@ inline int _host_view(mat_type &mat, copy_type &cm, const size_t o,
|
|||||||
|
|
||||||
template <class mat_type>
|
template <class mat_type>
|
||||||
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||||
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
|
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT /*kind2*/){
|
||||||
cl_mem_flags buffer_perm;
|
cl_mem_flags buffer_perm;
|
||||||
cl_map_flags map_perm;
|
cl_map_flags map_perm;
|
||||||
if (kind==UCL_READ_ONLY) {
|
if (kind==UCL_READ_ONLY) {
|
||||||
@ -583,7 +583,7 @@ template <> struct _ucl_memcpy<1,0> {
|
|||||||
template <class p1, class p2>
|
template <class p1, class p2>
|
||||||
static inline void mc(p1 &dst, const p2 &src, const size_t n,
|
static inline void mc(p1 &dst, const p2 &src, const size_t n,
|
||||||
cl_command_queue &cq, const cl_bool block,
|
cl_command_queue &cq, const cl_bool block,
|
||||||
const size_t dst_offset, const size_t src_offset) {
|
const size_t /*dst_offset*/, const size_t src_offset) {
|
||||||
if (src.cbegin()==dst.cbegin()) {
|
if (src.cbegin()==dst.cbegin()) {
|
||||||
#ifdef UCL_DBG_MEM_TRACE
|
#ifdef UCL_DBG_MEM_TRACE
|
||||||
std::cerr << "UCL_COPY 1S\n";
|
std::cerr << "UCL_COPY 1S\n";
|
||||||
@ -641,7 +641,7 @@ template <> struct _ucl_memcpy<0,1> {
|
|||||||
template <class p1, class p2>
|
template <class p1, class p2>
|
||||||
static inline void mc(p1 &dst, const p2 &src, const size_t n,
|
static inline void mc(p1 &dst, const p2 &src, const size_t n,
|
||||||
cl_command_queue &cq, const cl_bool block,
|
cl_command_queue &cq, const cl_bool block,
|
||||||
const size_t dst_offset, const size_t src_offset) {
|
const size_t dst_offset, const size_t /*src_offset*/) {
|
||||||
if (src.cbegin()==dst.cbegin()) {
|
if (src.cbegin()==dst.cbegin()) {
|
||||||
if (block) ucl_sync(cq);
|
if (block) ucl_sync(cq);
|
||||||
#ifdef UCL_DBG_MEM_TRACE
|
#ifdef UCL_DBG_MEM_TRACE
|
||||||
|
|||||||
@ -35,19 +35,19 @@ class UCL_Texture {
|
|||||||
UCL_Texture() {}
|
UCL_Texture() {}
|
||||||
~UCL_Texture() {}
|
~UCL_Texture() {}
|
||||||
/// Construct with a specified texture reference
|
/// Construct with a specified texture reference
|
||||||
inline UCL_Texture(UCL_Program &prog, const char *texture_name) { }
|
inline UCL_Texture(UCL_Program & /*prog*/, const char * /*texture_name*/) { }
|
||||||
/// Set the texture reference for this object
|
/// Set the texture reference for this object
|
||||||
inline void get_texture(UCL_Program &prog, const char *texture_name) { }
|
inline void get_texture(UCL_Program & /*prog*/, const char * /*texture_name*/) { }
|
||||||
|
|
||||||
/// Bind a float array where each fetch grabs a vector of length numel
|
/// Bind a float array where each fetch grabs a vector of length numel
|
||||||
template<class mat_typ>
|
template<class mat_typ>
|
||||||
inline void bind_float(mat_typ &vec, const unsigned numel) { }
|
inline void bind_float(mat_typ & /*vec*/, const unsigned /*numel*/) { }
|
||||||
|
|
||||||
/// Unbind the texture reference from the memory allocation
|
/// Unbind the texture reference from the memory allocation
|
||||||
inline void unbind() { }
|
inline void unbind() { }
|
||||||
|
|
||||||
/// Make a texture reference available to kernel
|
/// Make a texture reference available to kernel
|
||||||
inline void allow(UCL_Kernel &kernel) { }
|
inline void allow(UCL_Kernel & /*kernel*/) { }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class UCL_Kernel;
|
friend class UCL_Kernel;
|
||||||
@ -62,7 +62,7 @@ class UCL_Const {
|
|||||||
inline UCL_Const(UCL_Program &prog, const char *global_name)
|
inline UCL_Const(UCL_Program &prog, const char *global_name)
|
||||||
{ get_global(prog,global_name); }
|
{ get_global(prog,global_name); }
|
||||||
/// Set the global reference for this object
|
/// Set the global reference for this object
|
||||||
inline void get_global(UCL_Program &prog, const char *global_name) {
|
inline void get_global(UCL_Program &prog, const char * /*global_name*/) {
|
||||||
if (_active) {
|
if (_active) {
|
||||||
CL_DESTRUCT_CALL(clReleaseContext(_context));
|
CL_DESTRUCT_CALL(clReleaseContext(_context));
|
||||||
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
|
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
|
||||||
|
|||||||
@ -71,7 +71,7 @@ class UCL_Timer {
|
|||||||
inline void init(UCL_Device &dev) { init(dev,dev.cq()); }
|
inline void init(UCL_Device &dev) { init(dev,dev.cq()); }
|
||||||
|
|
||||||
/// Initialize command queue for timing
|
/// Initialize command queue for timing
|
||||||
inline void init(UCL_Device &dev, command_queue &cq) {
|
inline void init(UCL_Device & /*dev*/, command_queue &cq) {
|
||||||
clear();
|
clear();
|
||||||
_cq=cq;
|
_cq=cq;
|
||||||
clRetainCommandQueue(_cq);
|
clRetainCommandQueue(_cq);
|
||||||
|
|||||||
@ -205,12 +205,11 @@ template <> struct _host_host_copy<1,1> {
|
|||||||
// Should never be here
|
// Should never be here
|
||||||
template <int host_t1, int host_t2> struct _host_host_copy {
|
template <int host_t1, int host_t2> struct _host_host_copy {
|
||||||
template <class mat1, class mat2>
|
template <class mat1, class mat2>
|
||||||
static inline void hhc(mat1 &dst, const mat2 &src, const size_t numel) {
|
static inline void hhc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/) {
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
template <class mat1, class mat2>
|
template <class mat1, class mat2>
|
||||||
static inline void hhc(mat1 &dst, const mat2 &src, const size_t rows,
|
static inline void hhc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/, const size_t /*cols*/) {
|
||||||
const size_t cols) {
|
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -470,24 +469,22 @@ template <int host_type1> struct _ucl_cast_copy<host_type1,1> {
|
|||||||
// Neither on host or both on host
|
// Neither on host or both on host
|
||||||
template <> struct _ucl_cast_copy<1,1> {
|
template <> struct _ucl_cast_copy<1,1> {
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/,
|
||||||
mat3 &cast_buffer, command_queue &cq) {
|
mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/, mat3 & /*cast_buffer*/) {
|
||||||
mat3 &cast_buffer) {
|
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
|
||||||
const size_t cols, mat3 &cast_buffer) {
|
const size_t /*cols*/, mat3 & /*cast_buffer*/) {
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
|
||||||
const size_t cols, mat3 &cast_buffer,
|
const size_t /*cols*/, mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
|
||||||
command_queue &cq) {
|
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -495,24 +492,22 @@ template <> struct _ucl_cast_copy<1,1> {
|
|||||||
// Neither on host or both on host
|
// Neither on host or both on host
|
||||||
template <> struct _ucl_cast_copy<0,0> {
|
template <> struct _ucl_cast_copy<0,0> {
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/,
|
||||||
mat3 &cast_buffer, command_queue &cq) {
|
mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/, mat3 & /*cast_buffer*/) {
|
||||||
mat3 &cast_buffer) {
|
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
|
||||||
const size_t cols, mat3 &cast_buffer) {
|
const size_t /*cols*/, mat3 & /*cast_buffer*/) {
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
template <class mat1, class mat2, class mat3>
|
template <class mat1, class mat2, class mat3>
|
||||||
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
|
static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
|
||||||
const size_t cols, mat3 &cast_buffer,
|
const size_t cols, mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
|
||||||
command_queue &cq) {
|
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -125,7 +125,7 @@ class UCL_D_Vec : public UCL_BaseMat {
|
|||||||
* - The view does not prevent the memory from being freed by the
|
* - The view does not prevent the memory from being freed by the
|
||||||
* allocating container when using CUDA APIs **/
|
* allocating container when using CUDA APIs **/
|
||||||
template <class ucl_type>
|
template <class ucl_type>
|
||||||
inline void view(ucl_type &input, const size_t rows, const size_t cols) {
|
inline void view(ucl_type &input, const size_t UCL_DEBUG_ARG(rows), const size_t cols) {
|
||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
assert(rows==1);
|
assert(rows==1);
|
||||||
#endif
|
#endif
|
||||||
@ -230,8 +230,8 @@ class UCL_D_Vec : public UCL_BaseMat {
|
|||||||
* - The view does not prevent the memory from being freed by the
|
* - The view does not prevent the memory from being freed by the
|
||||||
* allocating container when using CUDA APIs **/
|
* allocating container when using CUDA APIs **/
|
||||||
template <class ucl_type>
|
template <class ucl_type>
|
||||||
inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
|
inline void view_offset(const size_t offset,ucl_type &input,
|
||||||
const size_t cols) {
|
const size_t UCL_DEBUG_ARG(rows), const size_t cols) {
|
||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
assert(rows==1);
|
assert(rows==1);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -126,7 +126,7 @@ class UCL_H_Vec : public UCL_BaseMat {
|
|||||||
* allocating container when using CUDA APIs
|
* allocating container when using CUDA APIs
|
||||||
* - Viewing a device container on the host is not supported **/
|
* - Viewing a device container on the host is not supported **/
|
||||||
template <class ucl_type>
|
template <class ucl_type>
|
||||||
inline void view(ucl_type &input, const size_t rows, const size_t cols) {
|
inline void view(ucl_type &input, const size_t UCL_DEBUG_ARG(rows), const size_t cols) {
|
||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
assert(rows==1);
|
assert(rows==1);
|
||||||
#endif
|
#endif
|
||||||
@ -188,7 +188,7 @@ class UCL_H_Vec : public UCL_BaseMat {
|
|||||||
* allocating container when using CUDA APIs
|
* allocating container when using CUDA APIs
|
||||||
* - Viewing a device pointer on the host is not supported **/
|
* - Viewing a device pointer on the host is not supported **/
|
||||||
template <class ptr_type>
|
template <class ptr_type>
|
||||||
inline void view(ptr_type *input, const size_t rows, const size_t cols,
|
inline void view(ptr_type *input, const size_t UCL_DEBUG_ARG(rows), const size_t cols,
|
||||||
UCL_Device &dev) {
|
UCL_Device &dev) {
|
||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
assert(rows==1);
|
assert(rows==1);
|
||||||
@ -233,7 +233,7 @@ class UCL_H_Vec : public UCL_BaseMat {
|
|||||||
* allocating container when using CUDA APIs
|
* allocating container when using CUDA APIs
|
||||||
* - Viewing a device container on the host is not supported **/
|
* - Viewing a device container on the host is not supported **/
|
||||||
template <class ucl_type>
|
template <class ucl_type>
|
||||||
inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
|
inline void view_offset(const size_t offset,ucl_type &input,const size_t UCL_DEBUG_ARG(rows),
|
||||||
const size_t cols) {
|
const size_t cols) {
|
||||||
#ifdef UCL_DEBUG
|
#ifdef UCL_DEBUG
|
||||||
assert(rows==1);
|
assert(rows==1);
|
||||||
|
|||||||
@ -27,7 +27,7 @@ template <int st> struct _ucl_s_obj_help;
|
|||||||
// -- Can potentially use same memory if shared by accelerator
|
// -- Can potentially use same memory if shared by accelerator
|
||||||
template <> struct _ucl_s_obj_help<1> {
|
template <> struct _ucl_s_obj_help<1> {
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline int alloc(t1 &host, t2 &device, t3 &_buffer,
|
static inline int alloc(t1 &host, t2 &device, t3 & /*_buffer*/,
|
||||||
const int cols, UCL_Device &acc,
|
const int cols, UCL_Device &acc,
|
||||||
const enum UCL_MEMOPT kind1,
|
const enum UCL_MEMOPT kind1,
|
||||||
const enum UCL_MEMOPT kind2) {
|
const enum UCL_MEMOPT kind2) {
|
||||||
@ -131,41 +131,37 @@ template <> struct _ucl_s_obj_help<1> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline void copy(t1 &dst, t2 &src, t3 &buffer, const bool async) {
|
static inline void copy(t1 &dst, t2 &src, t3 & /*buffer*/, const bool async) {
|
||||||
ucl_copy(dst,src,async);
|
ucl_copy(dst,src,async);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline void copy(t1 &dst, t2 &src, t3 &buffer, command_queue &cq) {
|
static inline void copy(t1 &dst, t2 &src, t3 & /*buffer*/, command_queue &cq) {
|
||||||
ucl_copy(dst,src,cq);
|
ucl_copy(dst,src,cq);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline void copy(t1 &dst, t2 &src, const int cols, t3 &buffer,
|
static inline void copy(t1 &dst, t2 &src, const int cols, t3 & /*buffer*/, const bool async) {
|
||||||
const bool async) {
|
|
||||||
ucl_copy(dst,src,cols,async);
|
ucl_copy(dst,src,cols,async);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline void copy(t1 &dst, t2 &src, const int cols, t3 &buffer,
|
static inline void copy(t1 &dst, t2 &src, const int cols, t3 & /*buffer*/, command_queue &cq) {
|
||||||
command_queue &cq) {
|
|
||||||
ucl_copy(dst,src,cols,cq);
|
ucl_copy(dst,src,cols,cq);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline void copy(t1 &dst, t2 &src, const int rows, const int cols,
|
static inline void copy(t1 &dst, t2 &src, const int rows, const int cols, t3 & /*buffer*/, const bool async) {
|
||||||
t3 &buffer, const bool async) {
|
|
||||||
ucl_copy(dst,src,rows,cols,async);
|
ucl_copy(dst,src,rows,cols,async);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline void copy(t1 &dst, t2 &src, const int rows, const int cols,
|
static inline void copy(t1 &dst, t2 &src, const int rows, const int cols, t3 & /*buffer*/, command_queue &cq) {
|
||||||
t3 &buffer, command_queue &cq) {
|
|
||||||
ucl_copy(dst,src,rows,cols,cq);
|
ucl_copy(dst,src,rows,cols,cq);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline int dev_resize(t1 &device, t2 &host, t3 &buff,const int cols) {
|
static inline int dev_resize(t1 &device, t2 &host, t3 & /*buff*/,const int cols) {
|
||||||
if (device.kind()==UCL_VIEW) {
|
if (device.kind()==UCL_VIEW) {
|
||||||
device.view(host);
|
device.view(host);
|
||||||
return UCL_SUCCESS;
|
return UCL_SUCCESS;
|
||||||
@ -353,7 +349,7 @@ template <int st> struct _ucl_s_obj_help {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class t1, class t2, class t3>
|
template <class t1, class t2, class t3>
|
||||||
static inline int dev_resize(t1 &device, t2 &host, t3 &buff,const int cols) {
|
static inline int dev_resize(t1 &device, t2 & /*host*/, t3 &buff,const int cols) {
|
||||||
int err=buff.resize(cols);
|
int err=buff.resize(cols);
|
||||||
if (err!=UCL_SUCCESS)
|
if (err!=UCL_SUCCESS)
|
||||||
return err;
|
return err;
|
||||||
|
|||||||
@ -48,10 +48,10 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
|||||||
const double *host_pdamp, const double *host_thole,
|
const double *host_pdamp, const double *host_thole,
|
||||||
const double *host_dirdamp, const int *host_amtype2class,
|
const double *host_dirdamp, const int *host_amtype2class,
|
||||||
const double *host_special_hal,
|
const double *host_special_hal,
|
||||||
const double *host_special_repel,
|
const double * /*host_special_repel*/,
|
||||||
const double *host_special_disp,
|
const double * /*host_special_disp*/,
|
||||||
const double *host_special_mpole,
|
const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double * /*host_special_polar_wscale*/,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
const double *host_special_polar_pscale,
|
const double *host_special_polar_pscale,
|
||||||
const double *host_csix, const double *host_adisp,
|
const double *host_csix, const double *host_adisp,
|
||||||
@ -188,7 +188,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
|
|||||||
// Launch the real-space permanent field kernel
|
// Launch the real-space permanent field kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
int AmoebaT::udirect2b(const int /*eflag*/, const int /*vflag*/) {
|
||||||
int ainum=this->ans->inum();
|
int ainum=this->ans->inum();
|
||||||
if (ainum == 0)
|
if (ainum == 0)
|
||||||
return 0;
|
return 0;
|
||||||
@ -230,7 +230,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
|||||||
// Launch the real-space induced field kernel, returning field and fieldp
|
// Launch the real-space induced field kernel, returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
int AmoebaT::umutual2b(const int /*eflag*/, const int /*vflag*/) {
|
||||||
int ainum=this->ans->inum();
|
int ainum=this->ans->inum();
|
||||||
if (ainum == 0)
|
if (ainum == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@ -119,8 +119,8 @@ void amoeba_gpu_clear() {
|
|||||||
|
|
||||||
int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall,
|
int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype,
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
int *host_amgroup, double **host_rpole,
|
int *host_amgroup, double **host_rpole, double ** /*host_uind*/,
|
||||||
double **host_uind, double **host_uinp, double *host_pval,
|
double ** /*host_uinp*/, double * /*host_pval*/,
|
||||||
double *sublo, double *subhi, tagint *tag,
|
double *sublo, double *subhi, tagint *tag,
|
||||||
int **nspecial, tagint **special,
|
int **nspecial, tagint **special,
|
||||||
int *nspecial15, tagint **special15,
|
int *nspecial15, tagint **special15,
|
||||||
|
|||||||
@ -403,9 +403,14 @@ double AtomT::host_memory_usage() const {
|
|||||||
return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
|
return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_CUDPP
|
||||||
|
#define USE_CUDPP_ARG(arg) arg
|
||||||
|
#else
|
||||||
|
#define USE_CUDPP_ARG(arg)
|
||||||
|
#endif
|
||||||
// Sort arrays for neighbor list calculation
|
// Sort arrays for neighbor list calculation
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void AtomT::sort_neighbor(const int num_atoms) {
|
void AtomT::sort_neighbor(const int USE_CUDPP_ARG(num_atoms)) {
|
||||||
#ifdef USE_CUDPP
|
#ifdef USE_CUDPP
|
||||||
CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(),
|
CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(),
|
||||||
(int *)dev_particle_id.begin(),
|
(int *)dev_particle_id.begin(),
|
||||||
|
|||||||
@ -327,7 +327,7 @@ class Atom {
|
|||||||
|
|
||||||
/// Copy positions and types to device asynchronously
|
/// Copy positions and types to device asynchronously
|
||||||
/** Copies nall() elements **/
|
/** Copies nall() elements **/
|
||||||
inline void add_x_data(double **host_ptr, int *host_type) {
|
inline void add_x_data(double ** /*host_ptr*/, int * /*host_type*/) {
|
||||||
time_pos.start();
|
time_pos.start();
|
||||||
if (_x_avail==false) {
|
if (_x_avail==false) {
|
||||||
#ifdef GPU_CAST
|
#ifdef GPU_CAST
|
||||||
@ -441,7 +441,7 @@ class Atom {
|
|||||||
|
|
||||||
/// Copy velocities and tags to device asynchronously
|
/// Copy velocities and tags to device asynchronously
|
||||||
/** Copies nall() elements **/
|
/** Copies nall() elements **/
|
||||||
inline void add_v_data(double **host_ptr, tagint *host_tag) {
|
inline void add_v_data(double ** /*host_ptr*/, tagint * /*host_tag*/) {
|
||||||
time_vel.start();
|
time_vel.start();
|
||||||
if (_v_avail==false) {
|
if (_v_avail==false) {
|
||||||
#ifdef GPU_CAST
|
#ifdef GPU_CAST
|
||||||
|
|||||||
@ -288,7 +288,7 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
|
|||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **&ilist, int **&jnum, const double cpu_time,
|
int **&ilist, int **&jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo, double *prd) {
|
bool &success, double *host_q, double * /*boxlo*/, double * /*prd*/) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
if (eatom) _eflag=2;
|
if (eatom) _eflag=2;
|
||||||
else if (eflag_in) _eflag=1;
|
else if (eflag_in) _eflag=1;
|
||||||
@ -368,20 +368,21 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
|
|||||||
// this is the first part in a time step done on the GPU for AMOEBA for now
|
// this is the first part in a time step done on the GPU for AMOEBA for now
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
void BaseAmoebaT::compute_multipole_real(const int /*ago*/, const int inum_full,
|
||||||
const int nall, double **host_x,
|
const int /*nall*/, double ** /*host_x*/,
|
||||||
int *host_type, int *host_amtype,
|
int * /*host_type*/, int * /*host_amtype*/,
|
||||||
int *host_amgroup, double **host_rpole, double *host_pval,
|
int * /*host_amgroup*/, double ** /*host_rpole*/,
|
||||||
double *sublo, double *subhi, tagint *tag,
|
double */*host_pval*/, double * /*sublo*/,
|
||||||
int **nspecial, tagint **special,
|
double * /*subhi*/, tagint * /*tag*/,
|
||||||
int *nspecial15, tagint **special15,
|
int ** /*nspecial*/, tagint ** /*special*/,
|
||||||
const bool eflag_in, const bool vflag_in,
|
int * /*nspecial15*/, tagint ** /*special15*/,
|
||||||
const bool eatom, const bool vatom,
|
const bool /*eflag_in*/, const bool /*vflag_in*/,
|
||||||
int &host_start, int **ilist, int **jnum,
|
const bool /*eatom*/, const bool /*vatom*/,
|
||||||
const double cpu_time, bool &success,
|
int & /*host_start*/, int ** /*ilist*/, int ** /*jnum*/,
|
||||||
const double aewald, const double felec,
|
const double /*cpu_time*/, bool & /*success*/,
|
||||||
const double off2_mpole, double *host_q,
|
const double aewald, const double felec,
|
||||||
double *boxlo, double *prd, void **tep_ptr) {
|
const double off2_mpole, double * /*host_q*/,
|
||||||
|
double * /*boxlo*/, double * /*prd*/, void **tep_ptr) {
|
||||||
// ------------------- Resize _tep array ------------------------
|
// ------------------- Resize _tep array ------------------------
|
||||||
|
|
||||||
if (inum_full>_max_tep_size) {
|
if (inum_full>_max_tep_size) {
|
||||||
@ -393,7 +394,7 @@ void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
|||||||
_off2_mpole = off2_mpole;
|
_off2_mpole = off2_mpole;
|
||||||
_felec = felec;
|
_felec = felec;
|
||||||
_aewald = aewald;
|
_aewald = aewald;
|
||||||
const int red_blocks=multipole_real(_eflag,_vflag);
|
multipole_real(_eflag,_vflag);
|
||||||
|
|
||||||
// leave the answers (forces, energies and virial) on the device,
|
// leave the answers (forces, energies and virial) on the device,
|
||||||
// only copy them back in the last kernel (polar_real)
|
// only copy them back in the last kernel (polar_real)
|
||||||
@ -424,7 +425,7 @@ void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double
|
|||||||
// specify the correct cutoff and alpha values
|
// specify the correct cutoff and alpha values
|
||||||
_off2_polar = off2_polar;
|
_off2_polar = off2_polar;
|
||||||
_aewald = aewald;
|
_aewald = aewald;
|
||||||
const int red_blocks=udirect2b(_eflag,_vflag);
|
udirect2b(_eflag,_vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
|
|
||||||
@ -436,10 +437,10 @@ void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double
|
|||||||
// of the induced field
|
// of the induced field
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double ** /*host_rpole*/,
|
||||||
double **host_uind, double **host_uinp, double *host_pval,
|
double **host_uind, double **host_uinp, double * /*host_pval*/,
|
||||||
const double aewald, const double off2_polar,
|
const double aewald, const double off2_polar,
|
||||||
void** fieldp_ptr) {
|
void** /*fieldp_ptr*/) {
|
||||||
// only copy the necessary data arrays that are updated over the iterations
|
// only copy the necessary data arrays that are updated over the iterations
|
||||||
// use nullptr for the other arrays that are already copied from host to device
|
// use nullptr for the other arrays that are already copied from host to device
|
||||||
cast_extra_data(host_amtype, host_amgroup, nullptr, host_uind, host_uinp, nullptr);
|
cast_extra_data(host_amtype, host_amgroup, nullptr, host_uind, host_uinp, nullptr);
|
||||||
@ -449,7 +450,7 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double
|
|||||||
_off2_polar = off2_polar;
|
_off2_polar = off2_polar;
|
||||||
_aewald = aewald;
|
_aewald = aewald;
|
||||||
// launch the kernel
|
// launch the kernel
|
||||||
const int red_blocks=umutual2b(_eflag,_vflag);
|
umutual2b(_eflag,_vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
// NOTE: move this step to update_fieldp() to delay device-host transfer
|
// NOTE: move this step to update_fieldp() to delay device-host transfer
|
||||||
@ -492,7 +493,7 @@ void BaseAmoebaT::precompute_kspace(const int inum_full, const int bsorder,
|
|||||||
_fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE);
|
_fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE);
|
||||||
_fdip_sum_phi.alloc(_max_thetai_size*20,*(this->ucl_device),UCL_READ_WRITE);
|
_fdip_sum_phi.alloc(_max_thetai_size*20,*(this->ucl_device),UCL_READ_WRITE);
|
||||||
} else {
|
} else {
|
||||||
if (_thetai1.cols()<_max_thetai_size*bsorder) {
|
if ((int)_thetai1.cols()<_max_thetai_size*bsorder) {
|
||||||
_max_thetai_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
_max_thetai_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||||
_thetai1.resize(_max_thetai_size*bsorder);
|
_thetai1.resize(_max_thetai_size*bsorder);
|
||||||
_thetai2.resize(_max_thetai_size*bsorder);
|
_thetai2.resize(_max_thetai_size*bsorder);
|
||||||
@ -573,7 +574,7 @@ void BaseAmoebaT::precompute_kspace(const int inum_full, const int bsorder,
|
|||||||
int numel = _num_grid_points;
|
int numel = _num_grid_points;
|
||||||
if (_cgrid_brick.cols() == 0) {
|
if (_cgrid_brick.cols() == 0) {
|
||||||
_cgrid_brick.alloc(numel, *(this->ucl_device), UCL_READ_WRITE, UCL_READ_ONLY);
|
_cgrid_brick.alloc(numel, *(this->ucl_device), UCL_READ_WRITE, UCL_READ_ONLY);
|
||||||
} else if (numel > _cgrid_brick.cols()) {
|
} else if (numel > (int)_cgrid_brick.cols()) {
|
||||||
_cgrid_brick.resize(numel);
|
_cgrid_brick.resize(numel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -611,7 +612,7 @@ void BaseAmoebaT::compute_fphi_uind(double ****host_grid_brick,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// launch the kernel with its execution configuration (see below)
|
// launch the kernel with its execution configuration (see below)
|
||||||
const int red_blocks = fphi_uind();
|
fphi_uind();
|
||||||
|
|
||||||
// copy data from device to host asynchronously
|
// copy data from device to host asynchronously
|
||||||
_fdip_phi1.update_host(_max_thetai_size*10, true);
|
_fdip_phi1.update_host(_max_thetai_size*10, true);
|
||||||
@ -682,7 +683,7 @@ void BaseAmoebaT::compute_fphi_mpole(double ***host_grid_brick, void **host_fphi
|
|||||||
_cgrid_brick.update_device(_num_grid_points, false);
|
_cgrid_brick.update_device(_num_grid_points, false);
|
||||||
|
|
||||||
_felec = felec;
|
_felec = felec;
|
||||||
const int red_blocks = fphi_mpole();
|
fphi_mpole();
|
||||||
|
|
||||||
_fdip_sum_phi.update_host(_max_thetai_size*20);
|
_fdip_sum_phi.update_host(_max_thetai_size*20);
|
||||||
|
|
||||||
@ -698,9 +699,6 @@ int BaseAmoebaT::fphi_mpole() {
|
|||||||
if (ainum == 0)
|
if (ainum == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
int _nall=atom->nall();
|
|
||||||
int nbor_pitch=nbor->nbor_pitch();
|
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
|
|
||||||
const int BX=block_size();
|
const int BX=block_size();
|
||||||
@ -771,7 +769,7 @@ double BaseAmoebaT::host_memory_usage_atomic() const {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::setup_fft(const int numel, const int element_type)
|
void BaseAmoebaT::setup_fft(const int /*numel*/, const int /*element_type*/)
|
||||||
{
|
{
|
||||||
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
||||||
}
|
}
|
||||||
@ -781,7 +779,8 @@ void BaseAmoebaT::setup_fft(const int numel, const int element_type)
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int mode)
|
void BaseAmoebaT::compute_fft1d(void * /*in*/, void * /*out*/,
|
||||||
|
const int /*numel*/, const int /*mode*/)
|
||||||
{
|
{
|
||||||
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
||||||
#if 0 // !defined(USE_OPENCL) && !defined(USE_HIP)
|
#if 0 // !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||||
@ -940,7 +939,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
|||||||
|
|
||||||
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
||||||
if (dev.has_subgroup_support()) {
|
if (dev.has_subgroup_support()) {
|
||||||
size_t mx_subgroup_sz = k_polar.max_subgroup_size(_block_size);
|
int mx_subgroup_sz = k_polar.max_subgroup_size(_block_size);
|
||||||
if (_threads_per_atom > mx_subgroup_sz)
|
if (_threads_per_atom > mx_subgroup_sz)
|
||||||
_threads_per_atom = mx_subgroup_sz;
|
_threads_per_atom = mx_subgroup_sz;
|
||||||
device->set_simd_size(mx_subgroup_sz);
|
device->set_simd_size(mx_subgroup_sz);
|
||||||
|
|||||||
@ -280,7 +280,7 @@ class BaseAmoeba {
|
|||||||
UCL_Kernel k_fphi_uind, k_fphi_mpole;
|
UCL_Kernel k_fphi_uind, k_fphi_mpole;
|
||||||
UCL_Kernel k_special15, k_short_nbor;
|
UCL_Kernel k_special15, k_short_nbor;
|
||||||
inline int block_size() { return _block_size; }
|
inline int block_size() { return _block_size; }
|
||||||
inline void set_kernel(const int eflag, const int vflag) {}
|
inline void set_kernel(const int /*eflag*/, const int /*vflag*/) {}
|
||||||
|
|
||||||
// --------------------------- TEXTURES -----------------------------
|
// --------------------------- TEXTURES -----------------------------
|
||||||
UCL_Texture pos_tex;
|
UCL_Texture pos_tex;
|
||||||
|
|||||||
@ -196,7 +196,7 @@ void BaseDPDT::compute(const int f_ago, const int inum_full, const int nall,
|
|||||||
const double cpu_time, bool &success, tagint *tag,
|
const double cpu_time, bool &success, tagint *tag,
|
||||||
double **host_v, const double dtinvsqrt,
|
double **host_v, const double dtinvsqrt,
|
||||||
const int seed, const int timestep,
|
const int seed, const int timestep,
|
||||||
const int nlocal, double *boxlo, double *prd) {
|
const int /*nlocal*/, double * /*boxlo*/, double * /*prd*/) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
if (eatom) eflag=2;
|
if (eatom) eflag=2;
|
||||||
@ -261,7 +261,7 @@ int** BaseDPDT::compute(const int ago, const int inum_full,
|
|||||||
const double cpu_time, bool &success,
|
const double cpu_time, bool &success,
|
||||||
double **host_v, const double dtinvsqrt,
|
double **host_v, const double dtinvsqrt,
|
||||||
const int seed, const int timestep,
|
const int seed, const int timestep,
|
||||||
double *boxlo, double *prd) {
|
double * /*boxlo*/, double * /*prd*/) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
if (eatom) eflag=2;
|
if (eatom) eflag=2;
|
||||||
|
|||||||
@ -44,19 +44,15 @@ int CHARMMLongT::bytes_per_atom(const int max_nbors) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int CHARMMLongT::init(const int ntypes,
|
int CHARMMLongT::init(const int ntypes, double host_cut_bothsq, double **host_lj1,
|
||||||
double host_cut_bothsq, double **host_lj1,
|
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||||
double **host_lj2, double **host_lj3,
|
double ** /*host_offset*/, double *host_special_lj, const int nlocal,
|
||||||
double **host_lj4, double **host_offset,
|
const int nall, const int max_nbors, const int maxspecial,
|
||||||
double *host_special_lj, const int nlocal,
|
const double cell_size, const double gpu_split, FILE *_screen,
|
||||||
const int nall, const int max_nbors,
|
double host_cut_ljsq, const double host_cut_coulsq,
|
||||||
const int maxspecial, const double cell_size,
|
double *host_special_coul, const double qqrd2e, const double g_ewald,
|
||||||
const double gpu_split, FILE *_screen,
|
const double cut_lj_innersq, const double denom_lj, double **epsilon,
|
||||||
double host_cut_ljsq, const double host_cut_coulsq,
|
double **sigma, const bool mix_arithmetic) {
|
||||||
double *host_special_coul, const double qqrd2e,
|
|
||||||
const double g_ewald, const double cut_lj_innersq,
|
|
||||||
const double denom_lj, double **epsilon,
|
|
||||||
double **sigma, const bool mix_arithmetic) {
|
|
||||||
int success;
|
int success;
|
||||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||||
_screen,charmm_long,"k_charmm_long");
|
_screen,charmm_long,"k_charmm_long");
|
||||||
|
|||||||
@ -52,7 +52,7 @@ DeviceT::~Device() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu,
|
||||||
const int first_gpu_id, const int gpu_mode,
|
const int first_gpu_id, const int gpu_mode,
|
||||||
const double p_split, const int t_per_atom,
|
const double p_split, const int t_per_atom,
|
||||||
const double user_cell_size, char *ocl_args,
|
const double user_cell_size, char *ocl_args,
|
||||||
@ -528,7 +528,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
|
int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
|
||||||
const int host_nlocal, const int nall,
|
const int host_nlocal, const int /*nall*/,
|
||||||
const int maxspecial, const int gpu_host,
|
const int maxspecial, const int gpu_host,
|
||||||
const int max_nbors, const double cutoff,
|
const int max_nbors, const double cutoff,
|
||||||
const bool pre_cut, const int threads_per_atom,
|
const bool pre_cut, const int threads_per_atom,
|
||||||
|
|||||||
@ -28,10 +28,10 @@ static DPD<PRECISION,ACC_PRECISION> DPDTMF;
|
|||||||
// Allocate memory on host and device and copy constants to device
|
// Allocate memory on host and device and copy constants to device
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||||
double **host_gamma, double **host_sigma, double **host_cut,
|
double **host_gamma, double **host_sigma, double **host_cut,
|
||||||
double *special_lj, const int inum,
|
double *special_lj, const int inum,
|
||||||
const int nall, const int max_nbors, const int maxspecial,
|
const int nall, const int /*max_nbors*/, const int maxspecial,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen) {
|
const double cell_size, int &gpu_mode, FILE *screen) {
|
||||||
DPDTMF.clear();
|
DPDTMF.clear();
|
||||||
gpu_mode=DPDTMF.device->gpu_mode();
|
gpu_mode=DPDTMF.device->gpu_mode();
|
||||||
double gpu_split=DPDTMF.device->particle_split();
|
double gpu_split=DPDTMF.device->particle_split();
|
||||||
|
|||||||
@ -310,7 +310,7 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
|
|||||||
const int nall, double **host_x, int *host_type,
|
const int nall, double **host_x, int *host_type,
|
||||||
int *ilist, int *numj, int **firstneigh,
|
int *ilist, int *numj, int **firstneigh,
|
||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom,
|
const bool /*eatom*/, const bool /*vatom*/,
|
||||||
int &host_start, const double cpu_time,
|
int &host_start, const double cpu_time,
|
||||||
bool &success, void **fp_ptr) {
|
bool &success, void **fp_ptr) {
|
||||||
this->acc_timers();
|
this->acc_timers();
|
||||||
@ -386,8 +386,8 @@ int** EAMT::compute(const int ago, const int inum_full, const int nall,
|
|||||||
double **host_x, int *host_type, double *sublo,
|
double **host_x, int *host_type, double *sublo,
|
||||||
double *subhi, tagint *tag, int **nspecial,
|
double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, const bool eflag_in,
|
tagint **special, const bool eflag_in,
|
||||||
const bool vflag_in, const bool eatom,
|
const bool vflag_in, const bool /*eatom*/,
|
||||||
const bool vatom, int &host_start, int **ilist, int **jnum,
|
const bool /*vatom*/, int &host_start, int **ilist, int **jnum,
|
||||||
const double cpu_time, bool &success, int &inum,
|
const double cpu_time, bool &success, int &inum,
|
||||||
void **fp_ptr) {
|
void **fp_ptr) {
|
||||||
this->acc_timers();
|
this->acc_timers();
|
||||||
|
|||||||
@ -176,19 +176,19 @@ double HippoT::host_memory_usage() const {
|
|||||||
// Compute the repulsion term, returning tep
|
// Compute the repulsion term, returning tep
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_repulsion(const int ago, const int inum_full,
|
void HippoT::compute_repulsion(const int /*ago*/, const int inum_full,
|
||||||
const int nall, double **host_x,
|
const int /*nall*/, double ** /*host_x*/,
|
||||||
int *host_type, int *host_amtype,
|
int * /*host_type*/, int * /*host_amtype*/,
|
||||||
int *host_amgroup, double **host_rpole,
|
int * /*host_amgroup*/, double ** /*host_rpole*/,
|
||||||
double *sublo, double *subhi, tagint *tag,
|
double * /*sublo*/, double * /*subhi*/, tagint * /*tag*/,
|
||||||
int **nspecial, tagint **special,
|
int ** /*nspecial*/, tagint ** /*special*/,
|
||||||
int *nspecial15, tagint **special15,
|
int * /*nspecial15*/, tagint ** /*special15*/,
|
||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom,
|
const bool eatom, const bool vatom,
|
||||||
int &host_start, int **ilist, int **jnum,
|
int & /*host_start*/, int ** /*ilist*/, int ** /*jnum*/,
|
||||||
const double cpu_time, bool &success,
|
const double /*cpu_time*/, bool & /*success*/,
|
||||||
const double aewald, const double off2_repulse,
|
const double /*aewald*/, const double off2_repulse,
|
||||||
double *host_q, double *boxlo, double *prd,
|
double * /*host_q*/, double * /*boxlo*/, double * /*prd*/,
|
||||||
double cut2, double c0, double c1, double c2,
|
double cut2, double c0, double c1, double c2,
|
||||||
double c3, double c4, double c5, void **tep_ptr) {
|
double c3, double c4, double c5, void **tep_ptr) {
|
||||||
this->acc_timers();
|
this->acc_timers();
|
||||||
@ -223,7 +223,7 @@ void HippoT::compute_repulsion(const int ago, const int inum_full,
|
|||||||
_c3 = c3;
|
_c3 = c3;
|
||||||
_c4 = c4;
|
_c4 = c4;
|
||||||
_c5 = c5;
|
_c5 = c5;
|
||||||
const int red_blocks=repulsion(this->_eflag,this->_vflag);
|
repulsion(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
this->_tep.update_host(this->_max_tep_size*4,false);
|
||||||
@ -287,7 +287,7 @@ void HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup,
|
|||||||
|
|
||||||
this->_off2_disp = off2_disp;
|
this->_off2_disp = off2_disp;
|
||||||
this->_aewald = aewald;
|
this->_aewald = aewald;
|
||||||
const int red_blocks=dispersion_real(this->_eflag,this->_vflag);
|
dispersion_real(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// only copy them back if this is the last kernel
|
// only copy them back if this is the last kernel
|
||||||
// otherwise, commenting out these two lines to leave the answers
|
// otherwise, commenting out these two lines to leave the answers
|
||||||
@ -341,21 +341,21 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
|
|||||||
// Compute the multipole real-space term, returning tep
|
// Compute the multipole real-space term, returning tep
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_multipole_real(const int ago, const int inum_full,
|
void HippoT::compute_multipole_real(const int /*ago*/, const int inum_full,
|
||||||
const int nall, double **host_x,
|
const int /*nall*/, double ** /*host_x*/,
|
||||||
int *host_type, int *host_amtype,
|
int * /*host_type*/, int * /*host_amtype*/,
|
||||||
int *host_amgroup, double **host_rpole,
|
int * /*host_amgroup*/, double ** /*host_rpole*/,
|
||||||
double* host_pval, double *sublo,
|
double* host_pval, double * /*sublo*/,
|
||||||
double *subhi, tagint *tag,
|
double * /*subhi*/, tagint * /*tag*/,
|
||||||
int **nspecial, tagint **special,
|
int ** /*nspecial*/, tagint ** /*special*/,
|
||||||
int *nspecial15, tagint **special15,
|
int * /*nspecial15*/, tagint ** /*special15*/,
|
||||||
const bool eflag_in, const bool vflag_in,
|
const bool /*eflag_in*/, const bool /*vflag_in*/,
|
||||||
const bool eatom, const bool vatom,
|
const bool /*eatom*/, const bool /*vatom*/,
|
||||||
int &host_start, int **ilist, int **jnum,
|
int & /*host_start*/, int ** /*ilist*/, int ** /*jnum*/,
|
||||||
const double cpu_time, bool &success,
|
const double /*cpu_time*/, bool & /*success*/,
|
||||||
const double aewald, const double felec,
|
const double aewald, const double felec,
|
||||||
const double off2_mpole, double *host_q,
|
const double off2_mpole, double * /*host_q*/,
|
||||||
double *boxlo, double *prd, void **tep_ptr) {
|
double * /*boxlo*/, double * /*prd*/, void **tep_ptr) {
|
||||||
|
|
||||||
// cast necessary data arrays from host to device
|
// cast necessary data arrays from host to device
|
||||||
|
|
||||||
@ -373,7 +373,7 @@ void HippoT::compute_multipole_real(const int ago, const int inum_full,
|
|||||||
this->_off2_mpole = off2_mpole;
|
this->_off2_mpole = off2_mpole;
|
||||||
this->_felec = felec;
|
this->_felec = felec;
|
||||||
this->_aewald = aewald;
|
this->_aewald = aewald;
|
||||||
const int red_blocks=multipole_real(this->_eflag,this->_vflag);
|
multipole_real(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
this->_tep.update_host(this->_max_tep_size*4,false);
|
||||||
@ -424,7 +424,7 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
|
|||||||
// returning field and fieldp
|
// returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void HippoT::compute_udirect2b(int * /*host_amtype*/, int * /*host_amgroup*/, double ** /*host_rpole*/,
|
||||||
double **host_uind, double **host_uinp, double* host_pval,
|
double **host_uind, double **host_uinp, double* host_pval,
|
||||||
const double aewald, const double off2_polar,
|
const double aewald, const double off2_polar,
|
||||||
void** fieldp_ptr) {
|
void** fieldp_ptr) {
|
||||||
@ -438,7 +438,7 @@ void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **hos
|
|||||||
|
|
||||||
this->_off2_polar = off2_polar;
|
this->_off2_polar = off2_polar;
|
||||||
this->_aewald = aewald;
|
this->_aewald = aewald;
|
||||||
const int red_blocks=udirect2b(this->_eflag,this->_vflag);
|
udirect2b(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
|
|
||||||
@ -449,7 +449,7 @@ void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **hos
|
|||||||
// Launch the real-space permanent field kernel
|
// Launch the real-space permanent field kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::udirect2b(const int eflag, const int vflag) {
|
int HippoT::udirect2b(const int /*eflag*/, const int /*vflag*/) {
|
||||||
int ainum=this->ans->inum();
|
int ainum=this->ans->inum();
|
||||||
if (ainum == 0)
|
if (ainum == 0)
|
||||||
return 0;
|
return 0;
|
||||||
@ -493,10 +493,9 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
|
|||||||
// returning field and fieldp
|
// returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void HippoT::compute_umutual2b(int * /*host_amtype*/, int * /*host_amgroup*/, double ** /*host_rpole*/,
|
||||||
double **host_uind, double **host_uinp, double *host_pval,
|
double **host_uind, double **host_uinp, double * /*host_pval*/,
|
||||||
const double aewald, const double off2_polar,
|
const double aewald, const double off2_polar, void ** /*fieldp_ptr*/) {
|
||||||
void** fieldp_ptr) {
|
|
||||||
|
|
||||||
// cast necessary data arrays from host to device
|
// cast necessary data arrays from host to device
|
||||||
|
|
||||||
@ -505,7 +504,7 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos
|
|||||||
|
|
||||||
this->_off2_polar = off2_polar;
|
this->_off2_polar = off2_polar;
|
||||||
this->_aewald = aewald;
|
this->_aewald = aewald;
|
||||||
const int red_blocks=umutual2b(this->_eflag,this->_vflag);
|
umutual2b(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
// NOTE: move this step to update_fieldp() to delay device-host transfer
|
// NOTE: move this step to update_fieldp() to delay device-host transfer
|
||||||
@ -517,7 +516,7 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos
|
|||||||
// Launch the real-space induced field kernel
|
// Launch the real-space induced field kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::umutual2b(const int eflag, const int vflag) {
|
int HippoT::umutual2b(const int /*eflag*/, const int /*vflag*/) {
|
||||||
int ainum=this->ans->inum();
|
int ainum=this->ans->inum();
|
||||||
if (ainum == 0)
|
if (ainum == 0)
|
||||||
return 0;
|
return 0;
|
||||||
@ -557,8 +556,8 @@ int HippoT::umutual2b(const int eflag, const int vflag) {
|
|||||||
// Reneighbor on GPU if necessary, and then compute polar real-space
|
// Reneighbor on GPU if necessary, and then compute polar real-space
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void HippoT::compute_polar_real(int * /*host_amtype*/, int * /*host_amgroup*/, double ** /*host_rpole*/,
|
||||||
double **host_uind, double **host_uinp, double *host_pval,
|
double **host_uind, double **host_uinp, double * /*host_pval*/,
|
||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom,
|
const bool eatom, const bool vatom,
|
||||||
const double aewald, const double felec,
|
const double aewald, const double felec,
|
||||||
|
|||||||
@ -123,7 +123,7 @@ void hippo_gpu_clear() {
|
|||||||
int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall,
|
int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype,
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
int *host_amgroup, double **host_rpole,
|
int *host_amgroup, double **host_rpole,
|
||||||
double **host_uind, double **host_uinp, double *host_pval,
|
double ** /*host_uind*/, double ** /*host_uinp*/, double * /*host_pval*/,
|
||||||
double *sublo, double *subhi, tagint *tag,
|
double *sublo, double *subhi, tagint *tag,
|
||||||
int **nspecial, tagint **special,
|
int **nspecial, tagint **special,
|
||||||
int *nspecial15, tagint **special15,
|
int *nspecial15, tagint **special15,
|
||||||
|
|||||||
@ -293,15 +293,17 @@ class Neighbor {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
int _simd_size;
|
int _simd_size;
|
||||||
|
#ifdef LAL_USE_OLD_NEIGHBOR
|
||||||
inline void set_nbor_block_size(const int mn) {
|
inline void set_nbor_block_size(const int mn) {
|
||||||
#ifdef LAL_USE_OLD_NEIGHBOR
|
|
||||||
int desired=mn/(2*_simd_size);
|
int desired=mn/(2*_simd_size);
|
||||||
desired*=_simd_size;
|
desired*=_simd_size;
|
||||||
if (desired<_simd_size) desired=_simd_size;
|
if (desired<_simd_size) desired=_simd_size;
|
||||||
else if (desired>_max_block_nbor_build) desired=_max_block_nbor_build;
|
else if (desired>_max_block_nbor_build) desired=_max_block_nbor_build;
|
||||||
_block_nbor_build=desired;
|
_block_nbor_build=desired;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
inline void set_nbor_block_size(const int) {}
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -150,7 +150,7 @@ double SWT::host_memory_usage() const {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int SWT::loop(const int eflag, const int vflag, const int evatom,
|
int SWT::loop(const int eflag, const int vflag, const int evatom,
|
||||||
bool &success) {
|
bool & /*success*/) {
|
||||||
const int nbor_pitch=this->nbor->nbor_pitch();
|
const int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
|
||||||
// build the short neighbor list
|
// build the short neighbor list
|
||||||
|
|||||||
@ -56,7 +56,7 @@ int VashishtaT::init(const int ntypes, const int nlocal, const int nall, const i
|
|||||||
const double* costheta, const double* bigb,
|
const double* costheta, const double* bigb,
|
||||||
const double* big2b, const double* bigc)
|
const double* big2b, const double* bigc)
|
||||||
{
|
{
|
||||||
int success;
|
int success=0;
|
||||||
success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
|
success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
|
||||||
_screen,vashishta,"k_vashishta","k_vashishta_three_center",
|
_screen,vashishta,"k_vashishta","k_vashishta_three_center",
|
||||||
"k_vashishta_three_end","k_vashishta_short_nbor");
|
"k_vashishta_three_end","k_vashishta_short_nbor");
|
||||||
@ -211,7 +211,7 @@ double VashishtaT::host_memory_usage() const {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int VashishtaT::loop(const int eflag, const int vflag, const int evatom,
|
int VashishtaT::loop(const int eflag, const int vflag, const int evatom,
|
||||||
bool &success) {
|
bool & /*success*/) {
|
||||||
const int nbor_pitch=this->nbor->nbor_pitch();
|
const int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
|
||||||
// build the short neighbor list
|
// build the short neighbor list
|
||||||
|
|||||||
Reference in New Issue
Block a user