git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7426 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2012-01-06 17:13:10 +00:00
parent bbdb300778
commit aac0e5699f
3 changed files with 32 additions and 16 deletions
--- a/lib/gpu/lal_pppm.cpp
+++ b/lib/gpu/lal_pppm.cpp
@ -53,9 +53,10 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
                              const int nzhi_out, grdtyp **rho_coeff,
                              grdtyp **vd_brick, const double slab_volfactor, 
                              const int nx_pppm, const int ny_pppm,
-                              const int nz_pppm, int &flag) {
+                              const int nz_pppm, const bool split, int &flag) {
  _max_bytes=10;
  screen=_screen;
  _kspace_split=split;
  bool success=true;
  flag=device->init(*ans,nlocal,nall);
@ -359,10 +360,10 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
  time_interp.stop();
  ans->copy_answers(false,false,false,false);
-  device->add_ans_object(ans);
+  if (_kspace_split==false)
    device->add_ans_object(ans);
 }
 template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
 double PPPMT::host_memory_usage() const {
  return device->atom.host_memory_usage()+
--- a/lib/gpu/lal_pppm.h
+++ b/lib/gpu/lal_pppm.h
@ -48,7 +48,8 @@ class PPPM {
                const int nxhi_out, const int nyhi_out, const int nzhi_out,
                grdtyp **rho_coeff, grdtyp **vd_brick, 
                const double slab_volfactor, const int nx_pppm, 
-                const int ny_pppm, const int nz_pppm, int &success);
+                const int ny_pppm, const int nz_pppm, const bool split, 
                int &success);
  /// Check if there is enough storage for atom arrays and realloc if not
  /** \param success set to false if insufficient memory **/
@ -174,7 +175,7 @@ class PPPM {
  UCL_Texture q_tex;
 protected:
-  bool _allocated, _compiled, _precompute_done;
+  bool _allocated, _compiled, _precompute_done, _kspace_split;
  int _block_size, _block_pencils, _pencil_size, _max_brick_atoms, _max_atoms;
  double  _max_bytes, _max_an_bytes;
  double _cpu_idle_time;
--- a/lib/gpu/lal_pppm_ext.cpp
+++ b/lib/gpu/lal_pppm_ext.cpp
@ -36,7 +36,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
                       const int nzhi_out, grdtyp **rho_coeff,
                       grdtyp **vd_brick, const double slab_volfactor,
                       const int nx_pppm, const int ny_pppm, const int nz_pppm,
-                       int &success) {
+                       const bool split, int &success) {
  pppm.clear(0.0);
  int first_gpu=pppm.device->first_device();
  int last_gpu=pppm.device->last_device();
@ -60,7 +60,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
  if (world_me==0)
    host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,nzlo_out,
                         nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
-                         slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
+                         slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
  pppm.device->world_barrier();
  if (message)
@ -79,7 +79,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
      host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,
                           nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
                           vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
-                           success);
+                           split,success);
    pppm.device->gpu_barrier();
    if (message) 
@ -97,11 +97,12 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
                        const int nzhi_out, float **rho_coeff,
                        float **vd_brick, const double slab_volfactor,
                        const int nx_pppm, const int ny_pppm, const int nz_pppm,
-                        int &success) {
+                        const bool split, int &success) {
  float *b=pppm_gpu_init(PPPMF,nlocal,nall,screen,order,nxlo_out,nylo_out,
                         nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
-                         slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
+                         slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
-  PPPMF.device->set_single_precompute(&PPPMF);                         
+  if (split==false)
    PPPMF.device->set_single_precompute(&PPPMF);                         
  return b;
 }
@ -118,13 +119,19 @@ int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
 }
 void pppm_gpu_interp_f(const float qqrd2e_scale) {
-  return PPPMF.interp(qqrd2e_scale);
+  PPPMF.interp(qqrd2e_scale);
 }
 double pppm_gpu_bytes_f() {
  return PPPMF.host_memory_usage();
 }
 void pppm_gpu_forces_f(double **f) {
  double etmp;
  PPPMF.atom->data_unavail();
  PPPMF.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
 }
 double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
                         const int order, const int nxlo_out, 
                         const int nylo_out, const int nzlo_out,
@ -132,12 +139,13 @@ double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
                         const int nzhi_out, double **rho_coeff,
                         double **vd_brick, const double slab_volfactor,
                         const int nx_pppm, const int ny_pppm,
-                         const int nz_pppm, int &success) {
+                         const int nz_pppm, const bool split, int &success) {
  double *b=pppm_gpu_init(PPPMD,nlocal,nall,screen,order,nxlo_out,nylo_out,
                          nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
                          vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
-                          success);                        
+                          split,success);                        
-  PPPMF.device->set_double_precompute(&PPPMD);                         
+  if (split==false)
    PPPMD.device->set_double_precompute(&PPPMD);                         
  return b;
 }
@ -154,10 +162,16 @@ int pppm_gpu_spread_d(const int ago, const int nlocal, const int nall,
 }
 void pppm_gpu_interp_d(const double qqrd2e_scale) {
-  return PPPMD.interp(qqrd2e_scale);
+  PPPMD.interp(qqrd2e_scale);
 }
 double pppm_gpu_bytes_d() {
  return PPPMD.host_memory_usage();
 }
 void pppm_gpu_forces_d(double **f) {
  double etmp;
  PPPMD.atom->data_unavail();
  PPPMD.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
 }