git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15248 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2016-07-01 23:27:26 +00:00
parent 8366b35459
commit 9656958169
245 changed files with 4890 additions and 4832 deletions

View File

@ -17,7 +17,7 @@
/* -----------------------------------------------------------------------
Copyright (2009) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
@ -37,21 +37,21 @@ class UCL_H_Mat : public UCL_BaseMat {
ROW_MAJOR = 1,
VECTOR = 0
};
typedef numtyp data_type;
typedef numtyp data_type;
UCL_H_Mat() : _cols(0) {
#ifdef _OCL_MAT
_carray=(cl_mem)(0);
#endif
}
~UCL_H_Mat() { _host_free(*this); }
/// Construct with specied number of rows and columns
/** \sa alloc() **/
UCL_H_Mat(const size_t rows, const size_t cols, UCL_Device &device,
const enum UCL_MEMOPT kind=UCL_READ_WRITE)
UCL_H_Mat(const size_t rows, const size_t cols, UCL_Device &device,
const enum UCL_MEMOPT kind=UCL_READ_WRITE)
{ _cols=0; _kind=UCL_VIEW; alloc(rows,cols,device,kind); }
/// Set up host matrix with specied # of rows/cols and reserve memory
/** The kind parameter controls memory pinning as follows:
* - UCL_READ_WRITE - Specify that you will read and write from host
@ -74,7 +74,7 @@ class UCL_H_Mat : public UCL_BaseMat {
<< " bytes on host.\n";
_row_bytes=0;
UCL_GERYON_EXIT;
#endif
#endif
_row_bytes=0;
return err;
}
@ -84,7 +84,7 @@ class UCL_H_Mat : public UCL_BaseMat {
_kind=kind;
_end=_array+rows*cols;
return err;
}
}
/// Set up host matrix with specied # of rows/cols and reserve memory
/** The kind parameter controls memory pinning as follows:
@ -117,15 +117,15 @@ class UCL_H_Mat : public UCL_BaseMat {
_kind=kind;
_end=_array+rows*cols;
return err;
}
}
/// Do not allocate memory, instead use an existing allocation from Geryon
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
template <class ucl_type>
inline void view(ucl_type &input, const size_t rows, const size_t cols,
const size_t stride) {
@ -149,45 +149,45 @@ class UCL_H_Mat : public UCL_BaseMat {
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported **/
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported **/
template <class ucl_type>
inline void view(ucl_type &input, const size_t rows, const size_t cols)
inline void view(ucl_type &input, const size_t rows, const size_t cols)
{ view(input,rows,cols,input.row_size()); }
/// Do not allocate memory, instead use an existing allocation from Geryon
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* allocating container when using CUDA APIs
* - If a matrix is used a input, all elements (including padding)
* will be used for view
* - Viewing a device container on the host is not supported **/
* will be used for view
* - Viewing a device container on the host is not supported **/
template <class ucl_type>
inline void view(ucl_type &input, const size_t cols)
{ view(input,1,cols); }
/// Do not allocate memory, instead use an existing allocation from Geryon
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* allocating container when using CUDA APIs
* - If a matrix is used a input, all elements (including padding)
* will be used for view when using CUDA APIs
* - Viewing a device container on the host is not supported **/
* will be used for view when using CUDA APIs
* - Viewing a device container on the host is not supported **/
template <class ucl_type>
inline void view(ucl_type &input)
inline void view(ucl_type &input)
{ view(input,input.rows(),input.cols()); }
/// Do not allocate memory, instead use an existing allocation
/** - No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
template <class ptr_type>
inline void view(ptr_type *input, const size_t rows, const size_t cols,
const size_t stride, UCL_Device &dev) {
const size_t stride, UCL_Device &dev) {
assert(rows==1 || stride==cols);
clear();
_kind=UCL_VIEW;
@ -197,40 +197,40 @@ class UCL_H_Mat : public UCL_BaseMat {
this->_cq=dev.cq();
_array=input;
_end=_array+_cols;
#ifdef _OCL_MAT
_host_view(*this,dev,_row_bytes*rows);
#endif
#endif
}
/// Do not allocate memory, instead use an existing allocation
/** - No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported **/
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported **/
template <class ptr_type>
inline void view(ptr_type *input, const size_t rows, const size_t cols,
UCL_Device &dev) { view(input,rows,cols,cols,dev); }
/// Do not allocate memory, instead use an existing allocation
/** - No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported **/
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported **/
template <class ptr_type>
inline void view(ptr_type *input, const size_t cols, UCL_Device &dev)
{ view(input,1,cols,dev); }
/// Do not allocate memory, instead use an existing allocation from Geryon
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
template <class ucl_type>
inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
const size_t cols, const size_t stride) {
const size_t cols, const size_t stride) {
assert(rows==1 || stride==cols);
clear();
_kind=UCL_VIEW;
@ -244,81 +244,81 @@ class UCL_H_Mat : public UCL_BaseMat {
_host_view(*this,input,_row_bytes*_rows);
#endif
}
/// Do not allocate memory, instead use an existing allocation from Geryon
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported **/
* allocating container when using CUDA APIs
* - Viewing a device container on the host is not supported **/
template <class ucl_type>
inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
const size_t cols)
const size_t cols)
{ view_offset(offset,input,rows,cols,input.row_size()); }
/// Do not allocate memory, instead use an existing allocation from Geryon
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* allocating container when using CUDA APIs
* - If a matrix is used a input, all elements (including padding)
* will be used for view
* - Viewing a device container on the host is not supported **/
* will be used for view
* - Viewing a device container on the host is not supported **/
template <class ucl_type>
inline void view_offset(const size_t offset,ucl_type &input,const size_t cols)
{ view_offset(offset,input,1,cols); }
/// Do not allocate memory, instead use an existing allocation from Geryon
/** This function must be passed a Geryon vector or matrix container.
* No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* allocating container when using CUDA APIs
* - If a matrix is used a input, all elements (including padding)
* will be used for view
* - Viewing a device container on the host is not supported **/
* will be used for view
* - Viewing a device container on the host is not supported **/
template <class ucl_type>
inline void view_offset(const size_t offset, ucl_type &input) {
if (input.rows()==1)
inline void view_offset(const size_t offset, ucl_type &input) {
if (input.rows()==1)
view_offset(offset,input,1,input.cols()-offset);
else
else
view_offset(offset,input,input.rows()-offset/input.row_size(),
input.cols());
}
/// Do not allocate memory, instead use an existing allocation
/** - No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container
* - Viewing a device pointer on the host is not supported **/
* allocating container
* - Viewing a device pointer on the host is not supported **/
template <class ptr_type>
inline void view_offset(const size_t offset,ptr_type *input,const size_t rows,
const size_t cols, UCL_Device &dev)
{ view(input+offset,rows,cols,dev); }
/// Do not allocate memory, instead use an existing allocation
/** - No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported
* \param stride Number of _elements_ between the start of each row **/
template <class ptr_type>
inline void view_offset(const size_t offset,ptr_type *input,const size_t rows,
const size_t cols,const size_t stride,UCL_Device &dev)
const size_t cols,const size_t stride,UCL_Device &dev)
{ view(input+offset,rows,cols,stride,dev); }
/// Do not allocate memory, instead use an existing allocation
/** - No memory is freed when the object is destructed.
* - The view does not prevent the memory from being freed by the
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported **/
* allocating container when using CUDA APIs
* - Viewing a device pointer on the host is not supported **/
template <class ptr_type>
inline void view_offset(const size_t offset, ptr_type *input,
inline void view_offset(const size_t offset, ptr_type *input,
const size_t cols, UCL_Device &dev)
{ view(input+offset,1,cols,dev); }
/// Free memory and set size to 0
inline void clear()
{ _host_free(*this); _cols=0; _kind=UCL_VIEW; }
inline void clear()
{ _host_free(*this); _cols=0; _kind=UCL_VIEW; }
/// Resize the allocation to rows x cols elements
/** \note Cannot be used on views **/
@ -333,7 +333,7 @@ class UCL_H_Mat : public UCL_BaseMat {
<< " bytes on host.\n";
_row_bytes=0;
UCL_GERYON_EXIT;
#endif
#endif
_row_bytes=0;
return err;
}
@ -347,7 +347,7 @@ class UCL_H_Mat : public UCL_BaseMat {
/// Resize (only if bigger) the allocation to contain rows x cols elements
/** \note Cannot be used on views **/
inline int resize_ib(const int rows, const int cols)
{ if (cols>_cols || rows>_rows) return resize(rows,cols);
{ if (cols>_cols || rows>_rows) return resize(rows,cols);
else return UCL_SUCCESS; }
/// Set each element to zero
@ -376,21 +376,21 @@ class UCL_H_Mat : public UCL_BaseMat {
inline size_t row_bytes() const { return _row_bytes; }
/// Get the size in bytes of 1 element
inline int element_size() const { return sizeof(numtyp); }
/// Get element at index i
inline numtyp & operator[](const int i) { return _array[i]; }
/// Get element at index i
inline const numtyp & operator[](const int i) const { return _array[i]; }
/// 2D access (row should always be 0)
inline numtyp & operator()(const int row, const int col)
/// 2D access (row should always be 0)
inline numtyp & operator()(const int row, const int col)
{ return _array[row*_cols+col]; }
/// 2D access (row should always be 0)
/// 2D access (row should always be 0)
inline const numtyp & operator()(const int row, const int col) const
{ return _array[row*_cols+col]; }
/// Returns pointer to memory pointer for allocation on host
inline numtyp ** host_ptr() { return &_array; }
/// Return the offset (in elements) from begin() pointer where data starts
/** \note Always 0 for host matrices and CUDA APIs **/
inline size_t offset() const { return 0; }
@ -409,14 +409,14 @@ class UCL_H_Mat : public UCL_BaseMat {
/// Returns an API specific device pointer (cl_mem& for OpenCL, void ** for CUDA)
inline const void ** cbegin() const { return (const void **)&_array; }
#endif
private:
numtyp *_array, *_end;
size_t _row_bytes, _rows, _cols;
#ifdef _OCL_MAT
device_ptr _carray;
#endif
#endif
};
#endif