git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10667 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2013-08-23 14:41:20 +00:00
parent 4047288238
commit 402d1a8605
105 changed files with 2003 additions and 954 deletions
--- a/lib/gpu/geryon/ucl_matrix.h
+++ b/lib/gpu/geryon/ucl_matrix.h
@ -48,17 +48,18 @@ class UCL_Matrix {
  /// Construct with specied number of rows and columns
  /** \sa alloc() **/
  UCL_Matrix(const size_t rows, const size_t cols, UCL_Device &acc, 
-             const enum UCL_MEMOPT kind1=UCL_RW_OPTIMIZED,
+             const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
             const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }
  
  /// Set up host matrix with specied # of rows/cols and reserve memory
-  /** The kind1 parameter controls memory pinning as follows:
-    * - UCL_NOT_PINNED      - Memory is not pinned
-    * - UCL_WRITE_OPTIMIZED - Memory can be pinned (write-combined)
-    * - UCL_RW_OPTIMIZED    - Memory can be pinned 
-    * The kind2 parameter controls memory optimizations as follows:
+  /** The kind1 parameter controls memory access from the host
+    * - UCL_READ_WRITE - Specify that you will read and write from host
+    * - UCL_WRITE_ONLY - Specify that you will only write from host
+    * - UCL_READ_ONLY  - Specify that you will only read from host
+    * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
+    * The kind2 parameter controls memory optimizations from the device:
    * - UCL_READ_WRITE - Specify that you will read and write in kernels
    * - UCL_WRITE_ONLY - Specify that you will only write in kernels
    * - UCL_READ_ONLY  - Specify that you will only read in kernels
@ -69,24 +70,25 @@ class UCL_Matrix {
    * \return UCL_SUCCESS if the memory allocation is successful **/
  template <class mat_type>
  inline int alloc(const size_t rows, const size_t cols, mat_type &cq,
-                   const enum UCL_MEMOPT kind1=UCL_RW_OPTIMIZED,
+                   const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
                   const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
    { return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        alloc(host,device,_buffer,rows,cols,cq,kind1,kind2); }
  
  /// Set up host matrix with specied # of rows/cols and reserve memory
-  /** The kind1 parameter controls memory pinning as follows:
-    * - UCL_NOT_PINNED      - Memory is not pinned
-    * - UCL_WRITE_OPTIMIZED - Memory can be pinned (write-combined)
-    * - UCL_RW_OPTIMIZED    - Memory can be pinned 
-    * The kind2 parameter controls memory optimizations as follows:
+  /** The kind1 parameter controls memory access from the host
+    * - UCL_READ_WRITE - Specify that you will read and write from host
+    * - UCL_WRITE_ONLY - Specify that you will only write from host
+    * - UCL_READ_ONLY  - Specify that you will only read from host
+    * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
+    * The kind2 parameter controls memory optimizations from the device:
    * - UCL_READ_WRITE - Specify that you will read and write in kernels
    * - UCL_WRITE_ONLY - Specify that you will only write in kernels
    * - UCL_READ_ONLY  - Specify that you will only read in kernels
    * \param device Used to get the default command queue for operations
    * \return UCL_SUCCESS if the memory allocation is successful **/
  inline int alloc(const size_t rows, const size_t cols, UCL_Device &acc,
-                   const enum UCL_MEMOPT kind1=UCL_RW_OPTIMIZED,
+                   const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
                   const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
    { return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }
@ -110,11 +112,22 @@ class UCL_Matrix {
    { if (new_rows>rows() || new_cols>cols()) return resize(new_rows,new_cols); 
      else return UCL_SUCCESS; }

-  /// Set each element to zero
-  inline void zero() { host.zero(); device.zero(); }
-  
-  /// Set first n elements to zero
-  inline void zero(const int n) { host.zero(n); device.zero(n); }
+  /// Set each element to zero (asynchronously on device)
+  inline void zero() { zero(cq()); }
+  /// Set first n elements to zero (asynchronously on device)
+  inline void zero(const int n) { zero(n,cq()); }
+  /// Set each element to zero (asynchronously on device)
+  inline void zero(command_queue &cq) {
+    host.zero(); 
+    if (device.kind()!=UCL_VIEW) device.zero(cq);
+    else if (_buffer.numel()>0) _buffer.zero();
+  }
+  /// Set first n elements to zero (asynchronously on device)
+  inline void zero(const int n, command_queue &cq) { 
+    host.zero(n); 
+    if (device.kind()!=UCL_VIEW) device.zero(n,cq); 
+    else if (_buffer.numel()>0) _buffer.zero();
+  }

  /// Get the number of elements
  inline size_t numel() const { return host.numel(); }
@ -145,6 +158,8 @@ class UCL_Matrix {
  
  /// Return the default command queue/stream associated with this data
  inline command_queue & cq() { return host.cq(); }
+  /// Change the default command queue associated with this data
+  inline void cq(command_queue &cq_in) { host.cq(cq_in); device.cq(cq_in); }
  /// Block until command_queue associated with matrix is complete
  inline void sync() { host.sync(); }