git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6053 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2011-05-02 15:02:52 +00:00
parent 2be078632d
commit 5f799182b3
70 changed files with 4489 additions and 2253 deletions
--- a/lib/gpu/pair_gpu_atom.cpp
+++ b/lib/gpu/pair_gpu_atom.cpp
@ -29,9 +29,8 @@ __win_sort _win_sort;
 #endif

 template <class numtyp, class acctyp>
-PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),_eflag(false),
-                              _vflag(false),_inum(0),_ilist(NULL), 
-                              _newton(false) {
+PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
+                              _max_gpu_bytes(0) {
  #ifndef USE_OPENCL
  sort_config.op = CUDPP_ADD;
  sort_config.datatype = CUDPP_UINT;
@ -56,28 +55,20 @@ int PairGPUAtomT::bytes_per_atom() const {
  int id_space=0;
  if (_gpu_nbor)
    id_space=2;
-  int bytes=4*sizeof(numtyp)+11*sizeof(acctyp)+id_space;
+  int bytes=4*sizeof(numtyp)+id_space;
  if (_rot)
-    bytes+=4*sizeof(numtyp)+4*sizeof(acctyp);
+    bytes+=4*sizeof(numtyp);
  if (_charge)
    bytes+=sizeof(numtyp);
  return bytes;
 }

 template <class numtyp, class acctyp>
-bool PairGPUAtomT::alloc(const int inum, const int nall) {
+bool PairGPUAtomT::alloc(const int nall) {
  _max_atoms=static_cast<int>(static_cast<double>(nall)*1.10);
-  if (_newton)
-    _max_local=_max_atoms;
-  else
-    _max_local=static_cast<int>(static_cast<double>(inum)*1.10);

  bool success=true;
  
-  int ans_elements=4;
-  if (_rot)
-    ans_elements+=4;
-  
  // Ignore host/device transfers?
  bool cpuview=false;
  if (dev->device_type()==UCL_CPU)
@ -107,8 +98,6 @@ bool PairGPUAtomT::alloc(const int inum, const int nall) {
  success=success && (host_x.alloc(_max_atoms*4,*dev,
                      UCL_WRITE_OPTIMIZED)==UCL_SUCCESS);
  #endif                      
-  success=success &&(host_ans.alloc(ans_elements*_max_local,*dev)==UCL_SUCCESS);
-  success=success &&(host_engv.alloc(_ev_fields*_max_local,*dev)==UCL_SUCCESS);
  // Buffer for casting only if different precisions
  if (_charge)
    success=success && (host_q.alloc(_max_atoms,*dev,
@ -120,15 +109,13 @@ bool PairGPUAtomT::alloc(const int inum, const int nall) {

    
  // ---------------------------  Device allocations
-  _gpu_bytes=0;
+  int gpu_bytes=0;
  if (cpuview) {
    #ifdef GPU_CAST
    assert(0==1);
    #else
    dev_x.view(host_x);
    #endif
-    dev_engv.view(host_engv);
-    dev_ans.view(host_ans);
    if (_rot)
      dev_quat.view(host_quat);
    if (_charge)
@ -140,49 +127,80 @@ bool PairGPUAtomT::alloc(const int inum, const int nall) {
                        dev_x_cast.alloc(_max_atoms*3,*dev,UCL_READ_ONLY));
    success=success && (UCL_SUCCESS==
                        dev_type_cast.alloc(_max_atoms,*dev,UCL_READ_ONLY));
-    _gpu_bytes+=dev_x_cast.row_bytes()+dev_type_cast.row_bytes();
+    gpu_bytes+=dev_x_cast.row_bytes()+dev_type_cast.row_bytes();
    #else
    success=success && (UCL_SUCCESS==
                        dev_x.alloc(_max_atoms*4,*dev,UCL_READ_ONLY));
    #endif
-    success=success && (dev_engv.alloc(_ev_fields*_max_local,*dev,
-                                       UCL_WRITE_ONLY)==UCL_SUCCESS);
-    success=success && (dev_ans.alloc(ans_elements*_max_local,
-                                      *dev,UCL_WRITE_ONLY)==UCL_SUCCESS);
    if (_charge) {
      success=success && (dev_q.alloc(_max_atoms,*dev,
                                      UCL_READ_ONLY)==UCL_SUCCESS);
-      _gpu_bytes+=dev_q.row_bytes();
+      gpu_bytes+=dev_q.row_bytes();
    }
    if (_rot) {
      success=success && (dev_quat.alloc(_max_atoms*4,*dev,
                                      UCL_READ_ONLY)==UCL_SUCCESS);
-      _gpu_bytes+=dev_quat.row_bytes();
+      gpu_bytes+=dev_quat.row_bytes();
    }
  }
  if (_gpu_nbor) {
    success=success && (dev_cell_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
    success=success && (dev_particle_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
-    _gpu_bytes+=dev_cell_id.row_bytes()+dev_particle_id.row_bytes();
+    gpu_bytes+=dev_cell_id.row_bytes()+dev_particle_id.row_bytes();
    if (_bonds) {
      success=success && (dev_tag.alloc(_max_atoms,*dev)==UCL_SUCCESS);
-      _gpu_bytes+=dev_tag.row_bytes();
+      gpu_bytes+=dev_tag.row_bytes();
    }
  }

-  _gpu_bytes+=dev_x.row_bytes()+dev_engv.row_bytes()+dev_ans.row_bytes();
+  gpu_bytes+=dev_x.row_bytes();
+  if (gpu_bytes>_max_gpu_bytes)
+    _max_gpu_bytes=gpu_bytes;
  
  _allocated=true;  
  return success;
 }

 template <class numtyp, class acctyp>
-bool PairGPUAtomT::init(const int inum, const int nall, const bool charge,
-                        const bool rot, UCL_Device &devi, const bool gpu_nbor,
+bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
+                              const bool gpu_nbor, const bool bonds) {
+  bool realloc=false;
+  if (charge && _charge==false) {
+    _charge=true;
+    realloc=true;
+  }
+  if (rot && _rot==false) {
+    _rot=true;
+    realloc=true;
+  }
+  if (gpu_nbor && _gpu_nbor==false) {
+    _gpu_nbor=true;
+    realloc=true;
+  }
+  if (bonds && _bonds==false) {
+    _bonds=true;
+    realloc=true;
+  }
+  if (realloc) {
+    _other=_charge || _rot;
+    int max_atoms=_max_atoms;
+    clear_resize();
+    return alloc(max_atoms);
+  }
+  return true;
+}
+
+template <class numtyp, class acctyp>
+bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
+                        UCL_Device &devi, const bool gpu_nbor,
                        const bool bonds) {
  clear();

  bool success=true;
+  _x_avail=false;
+  _q_avail=false;
+  _quat_avail=false;
+  _resized=false;
  _gpu_nbor=gpu_nbor;
  _bonds=bonds;
  _charge=charge;
@ -190,33 +208,25 @@ bool PairGPUAtomT::init(const int inum, const int nall, const bool charge,
  _other=_charge || _rot;
  dev=&devi;

-  _e_fields=1;
-  if (_charge)
-    _e_fields++;
-  _ev_fields=6+_e_fields;
-    
  // Initialize atom and nbor data
-  int ef_inum=inum;
-  if (ef_inum==0)
-    ef_inum=1000;
  int ef_nall=nall;
-  if (ef_nall<=ef_inum)
-    ef_nall=ef_inum*2;
+  if (ef_nall==0)
+    ef_nall=2000;
  
  // Initialize timers for the selected device
  time_pos.init(*dev);
-  time_other.init(*dev);
-  time_answer.init(*dev);
+  time_q.init(*dev);
+  time_quat.init(*dev);
  time_pos.zero();
-  time_other.zero();
-  time_answer.zero();
+  time_q.zero();
+  time_quat.zero();
  _time_cast=0.0;
  
  #ifdef GPU_CAST
  compile_kernels(*dev);
  #endif
  
-  return success && alloc(ef_inum,ef_nall);
+  return success && alloc(ef_nall);
 }
  
 template <class numtyp, class acctyp>
@ -234,16 +244,12 @@ void PairGPUAtomT::clear_resize() {
    dev_quat.clear();
    host_quat.clear();
  }
-  dev_ans.clear();
-  dev_engv.clear();
  #ifndef GPU_CAST
  host_x.clear();
  #else
  host_x_cast.clear();
  host_type_cast.clear();
  #endif
-  host_ans.clear();
-  host_engv.clear();
  dev_cell_id.clear();
  dev_particle_id.clear();
  dev_tag.clear();
@ -261,17 +267,14 @@ void PairGPUAtomT::clear_resize() {

 template <class numtyp, class acctyp>
 void PairGPUAtomT::clear() {
-  _gpu_bytes=0;
+  _max_gpu_bytes=0;
  if (!_allocated)
    return;

  time_pos.clear();
-  time_other.clear();
-  time_answer.clear();
+  time_q.clear();
+  time_quat.clear();
  clear_resize();
-  _inum=0;
-  _eflag=false;
-  _vflag=false;

  #ifdef GPU_CAST
  if (_compiled) {
@ -289,255 +292,10 @@ double PairGPUAtomT::host_memory_usage() const {
    atom_bytes+=1;
  if (_rot) 
    atom_bytes+=4;
-  int ans_bytes=atom_bytes+_ev_fields;
  return _max_atoms*atom_bytes*sizeof(numtyp)+
-         ans_bytes*(_max_local)*sizeof(acctyp)+
         sizeof(PairGPUAtom<numtyp,acctyp>);
 }
  
-template <class numtyp, class acctyp>
-void PairGPUAtomT::copy_answers(const bool eflag, const bool vflag,
-                                const bool ef_atom, const bool vf_atom) {
-  time_answer.start();
-  _eflag=eflag;
-  _vflag=vflag;
-  _ef_atom=ef_atom;
-  _vf_atom=vf_atom;
-    
-  int csize=_ev_fields;    
-  if (!eflag)
-    csize-=_e_fields;
-  if (!vflag)
-    csize-=6;
-      
-  if (csize>0)
-    ucl_copy(host_engv,dev_engv,_inum*csize,true);
-  if (_rot)
-    ucl_copy(host_ans,dev_ans,_inum*4*2,true);
-  else
-    ucl_copy(host_ans,dev_ans,_inum*4,true);
-  time_answer.stop();
-}
-
-template <class numtyp, class acctyp>
-void PairGPUAtomT::copy_answers(const bool eflag, const bool vflag,
-                                const bool ef_atom, const bool vf_atom,
-                                int *ilist) {
-  _ilist=ilist;
-  copy_answers(eflag,vflag,ef_atom,vf_atom);
-}
-
-template <class numtyp, class acctyp>
-double PairGPUAtomT::energy_virial(double *eatom, double **vatom,
-                                   double *virial) {
-  if (_eflag==false && _vflag==false)
-    return 0.0;
-
-  double evdwl=0.0;
-  if (_gpu_nbor) {
-    for (int i=0; i<_inum; i++) {
-      acctyp *ap=host_engv.begin()+i;
-      if (_eflag) {
-        if (_ef_atom) {
-          evdwl+=*ap;
-          eatom[i]+=*ap*0.5;
-          ap+=_inum;
-        } else {
-          evdwl+=*ap;
-          ap+=_inum;
-        }
-      }
-      if (_vflag) {
-        if (_vf_atom) {
-          for (int j=0; j<6; j++) {
-            vatom[i][j]+=*ap*0.5;
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        } else {
-          for (int j=0; j<6; j++) {
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        }
-      }
-    }
-    for (int j=0; j<6; j++)
-      virial[j]*=0.5;
-  } else {
-    for (int i=0; i<_inum; i++) {
-      acctyp *ap=host_engv.begin()+i;
-      int ii=_ilist[i];
-      if (_eflag) {
-        if (_ef_atom) {
-          evdwl+=*ap;
-          eatom[ii]+=*ap*0.5;
-          ap+=_inum;
-        } else {
-          evdwl+=*ap;
-          ap+=_inum;
-        }
-      }
-      if (_vflag) {
-        if (_vf_atom) {
-          for (int j=0; j<6; j++) {
-            vatom[ii][j]+=*ap*0.5;
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        } else {
-          for (int j=0; j<6; j++) {
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        }
-      }
-    }
-    for (int j=0; j<6; j++)
-      virial[j]*=0.5;
-  }
-  
-  evdwl*=0.5;
-  return evdwl;
-}
-
-template <class numtyp, class acctyp>
-double PairGPUAtomT::energy_virial(double *eatom, double **vatom,
-                                   double *virial, double &ecoul) {
-  if (_eflag==false && _vflag==false) {
-    ecoul=0.0;
-    return 0.0;
-  }
-
-  if (_charge==false)
-    return energy_virial(eatom,vatom,virial);
-
-  double evdwl=0.0;
-  double _ecoul=0.0;
-  if (_gpu_nbor) {
-    for (int i=0; i<_inum; i++) {
-      acctyp *ap=host_engv.begin()+i;
-      if (_eflag) {
-        if (_ef_atom) {
-          evdwl+=*ap;
-          eatom[i]+=*ap*0.5;
-          ap+=_inum;
-          _ecoul+=*ap;
-          eatom[i]+=*ap*0.5;
-          ap+=_inum;
-        } else {
-          evdwl+=*ap;
-          ap+=_inum;
-          _ecoul+=*ap;
-          ap+=_inum;
-        }
-      }
-      if (_vflag) {
-        if (_vf_atom) {
-          for (int j=0; j<6; j++) {
-            vatom[i][j]+=*ap*0.5;
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        } else {
-          for (int j=0; j<6; j++) {
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        }
-      }
-    }
-    for (int j=0; j<6; j++)
-      virial[j]*=0.5;
-  } else {
-    for (int i=0; i<_inum; i++) {
-      acctyp *ap=host_engv.begin()+i;
-      int ii=_ilist[i];
-      if (_eflag) {
-        if (_ef_atom) {
-          evdwl+=*ap;
-          eatom[ii]+=*ap*0.5;
-          ap+=_inum;
-          _ecoul+=*ap;
-          eatom[ii]+=*ap*0.5;
-          ap+=_inum;
-        } else {
-          evdwl+=*ap;
-          ap+=_inum;
-          _ecoul+=*ap;
-          ap+=_inum;
-        }
-      }
-      if (_vflag) {
-        if (_vf_atom) {
-          for (int j=0; j<6; j++) {
-            vatom[ii][j]+=*ap*0.5;
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        } else {
-          for (int j=0; j<6; j++) {
-            virial[j]+=*ap;
-            ap+=_inum;
-          }
-        }
-      }
-    }
-    for (int j=0; j<6; j++)
-      virial[j]*=0.5;
-  }
-  
-  evdwl*=0.5;
-  ecoul+=_ecoul*0.5;
-  return evdwl;
-}
-
-template <class numtyp, class acctyp>
-void PairGPUAtomT::get_answers(double **f, double **tor) {
-  acctyp *ap=host_ans.begin();
-  if (_gpu_nbor) {
-    for (int i=0; i<_inum; i++) {
-      f[i][0]+=*ap;
-      ap++;
-      f[i][1]+=*ap;
-      ap++;
-      f[i][2]+=*ap;
-      ap+=2;
-    }
-    if (_rot) {
-      for (int i=0; i<_inum; i++) {
-        tor[i][0]+=*ap;
-        ap++;
-        tor[i][1]+=*ap;
-        ap++;
-        tor[i][2]+=*ap;
-        ap+=2;
-      }
-    }
-  } else {
-    for (int i=0; i<_inum; i++) {
-      int ii=_ilist[i];
-      f[ii][0]+=*ap;
-      ap++;
-      f[ii][1]+=*ap;
-      ap++;
-      f[ii][2]+=*ap;
-      ap+=2;
-    }
-    if (_rot) {
-      for (int i=0; i<_inum; i++) {
-        int ii=_ilist[i];
-        tor[ii][0]+=*ap;
-        ap++;
-        tor[ii][1]+=*ap;
-        ap++;
-        tor[ii][2]+=*ap;
-        ap+=2;
-      }
-    }
-  }
-}
-
 // Sort arrays for neighbor list calculation
 template <class numtyp, class acctyp>
 void PairGPUAtomT::sort_neighbor(const int num_atoms) {