diff --git a/lib/gpu/atomic_gpu_memory.cpp b/lib/gpu/atomic_gpu_memory.cpp
index fbf5262089..5279223f20 100644
--- a/lib/gpu/atomic_gpu_memory.cpp
+++ b/lib/gpu/atomic_gpu_memory.cpp
@@ -68,6 +68,9 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
     _block_size=ucl_device->group_size();
   compile_kernels(*ucl_device,pair_program);
 
+  // Initialize host-device load balancer
+  hd_balancer.init(device,gpu_nbor,gpu_split);
+
   // Initialize timers for the selected GPU
   time_pair.init(*ucl_device);
   time_pair.zero();
@@ -76,9 +79,6 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
 
   _max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes();
 
-  // Initialize host-device load balancer
-  hd_balancer.init(device,gpu_nbor,gpu_split);
-
   return true;
 }
 
@@ -204,52 +204,6 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
   hd_balancer.stop_timer();
 }
 
-// ---------------------------------------------------------------------------
-// Reneighbor on GPU if necessary and then compute forces, virials, energies
-// ---------------------------------------------------------------------------
-template <class numtyp, class acctyp>
-int * AtomicGPUMemoryT::compute(const int ago, const int inum_full,
-                                const int nall, double **host_x, int *host_type,
-                                double *boxlo, double *boxhi, int *tag,
-                                int **nspecial, int **special, const bool eflag, 
-                                const bool vflag, const bool eatom,
-                                const bool vatom, int &host_start,
-                                const double cpu_time, bool &success) {
-  acc_timers();
-  if (inum_full==0) {
-    host_start=0;
-    // Make sure textures are correct if realloc by a different hybrid style
-    resize_atom(0,nall,success);
-    zero_timers();
-    return NULL;
-  }
-  
-  hd_balancer.balance(cpu_time);
-  int inum=hd_balancer.get_gpu_count(ago,inum_full);
-  ans->inum(inum);
-  host_start=inum;
- 
-  // Build neighbor list on GPU if necessary
-  if (ago==0) {
-    build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
-                    boxlo, boxhi, tag, nspecial, special, success);
-    if (!success)
-      return NULL;
-    hd_balancer.start_timer();
-  } else {
-    atom->cast_x_data(host_x,host_type);
-    hd_balancer.start_timer();
-    atom->add_x_data(host_x,host_type);
-  }
-
-  loop(eflag,vflag);
-  ans->copy_answers(eflag,vflag,eatom,vatom);
-  device->add_ans_object(ans);
-  hd_balancer.stop_timer();
-  
-  return nbor->host_nbor.begin();
-}
-
 // ---------------------------------------------------------------------------
 // Reneighbor on GPU if necessary and then compute forces, virials, energies
 // ---------------------------------------------------------------------------
diff --git a/lib/gpu/charge_gpu_memory.cpp b/lib/gpu/charge_gpu_memory.cpp
index a0c2d2f5d7..47b200ce23 100644
--- a/lib/gpu/charge_gpu_memory.cpp
+++ b/lib/gpu/charge_gpu_memory.cpp
@@ -212,12 +212,13 @@ void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
 // Reneighbor on GPU if necessary and then compute forces, virials, energies
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int * ChargeGPUMemoryT::compute(const int ago, const int inum_full,
+int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
                                 const int nall, double **host_x, int *host_type,
                                 double *boxlo, double *boxhi, int *tag,
                                 int **nspecial, int **special, const bool eflag, 
                                 const bool vflag, const bool eatom,
                                 const bool vatom, int &host_start,
+                                int **ilist, int **jnum,
                                 const double cpu_time, bool &success,
                                 double *host_q) {
   acc_timers();
@@ -249,13 +250,15 @@ int * ChargeGPUMemoryT::compute(const int ago, const int inum_full,
     atom->add_x_data(host_x,host_type);
   }
   atom->add_q_data();
+  *ilist=nbor->host_ilist.begin();
+  *jnum=nbor->host_acc.begin();
 
   loop(eflag,vflag);
   ans->copy_answers(eflag,vflag,eatom,vatom);
   device->add_ans_object(ans);
   hd_balancer.stop_timer();
   
-  return nbor->host_nbor.begin();
+  return nbor->host_jlist.begin()-host_start;
 }
 
 template <class numtyp, class acctyp>
diff --git a/lib/gpu/charge_gpu_memory.h b/lib/gpu/charge_gpu_memory.h
index ef8d8b0281..b9d2911c1e 100644
--- a/lib/gpu/charge_gpu_memory.h
+++ b/lib/gpu/charge_gpu_memory.h
@@ -127,12 +127,13 @@ class ChargeGPUMemory {
                const double cpu_time, bool &success, double *charge);
 
   /// Pair loop with device neighboring
-  int * compute(const int ago, const int inum_full, const int nall,
+  int** compute(const int ago, const int inum_full, const int nall,
                 double **host_x, int *host_type, double *boxlo,
                 double *boxhi, int *tag, int **nspecial,
                 int **special, const bool eflag, const bool vflag, 
                 const bool eatom, const bool vatom, int &host_start, 
-                const double cpu_time, bool &success, double *charge);
+                int **ilist, int **numj, const double cpu_time, bool &success,
+                double *charge);
 
   // -------------------------- DEVICE DATA ------------------------- 
 
diff --git a/lib/gpu/cmm_cut_gpu.cpp b/lib/gpu/cmm_cut_gpu.cpp
index cc299c4dad..f8ffd29c55 100644
--- a/lib/gpu/cmm_cut_gpu.cpp
+++ b/lib/gpu/cmm_cut_gpu.cpp
@@ -89,6 +89,7 @@ bool cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types,
   }
   if (message)
     fprintf(screen,"\n");
+  CMMMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -96,15 +97,16 @@ void cmm_gpu_clear() {
   CMMMF.clear();
 }
 
-int * cmm_gpu_compute_n(const int ago, const int inum_full,
+int** cmm_gpu_compute_n(const int ago, const int inum_full,
                         const int nall, double **host_x, int *host_type,
                         double *boxlo, double *boxhi, int *tag, int **nspecial,
                         int **special, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success) {
+                        int **ilist, int **jnum, const double cpu_time,
+                        bool &success) {
   return CMMMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                        boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                       vatom, host_start, cpu_time, success);
+                       vatom, host_start, ilist, jnum, cpu_time, success);
 }  
 			
 void cmm_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/cmmc_long_gpu.cpp b/lib/gpu/cmmc_long_gpu.cpp
index cab192b5aa..752c281af9 100644
--- a/lib/gpu/cmmc_long_gpu.cpp
+++ b/lib/gpu/cmmc_long_gpu.cpp
@@ -94,6 +94,7 @@ bool cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type,
   }
   if (message)
     fprintf(screen,"\n");
+  CMMLMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -101,15 +102,17 @@ void cmml_gpu_clear() {
   CMMLMF.clear();
 }
 
-int * cmml_gpu_compute_n(const int ago, const int inum_full,
+int** cmml_gpu_compute_n(const int ago, const int inum_full,
                          const int nall, double **host_x, int *host_type,
                          double *boxlo, double *boxhi, int *tag, int **nspecial, 
                          int **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
-                         const double cpu_time, bool &success, double *host_q) {
+                         int **ilist, int **jnum, const double cpu_time,
+                         bool &success, double *host_q) {
   return CMMLMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                         boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                        vatom, host_start, cpu_time, success, host_q);
+                        vatom, host_start, ilist, jnum, cpu_time, success,
+                        host_q);
 }  
 			
 void cmml_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/cmmc_msm_gpu.cpp b/lib/gpu/cmmc_msm_gpu.cpp
index 6b3babbb7c..66e7bd4b04 100644
--- a/lib/gpu/cmmc_msm_gpu.cpp
+++ b/lib/gpu/cmmc_msm_gpu.cpp
@@ -94,6 +94,7 @@ bool cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type,
   }
   if (message)
     fprintf(screen,"\n");
+  CMMMMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -101,15 +102,17 @@ void cmmm_gpu_clear() {
   CMMMMF.clear();
 }
 
-int * cmmm_gpu_compute_n(const int ago, const int inum_full,
+int** cmmm_gpu_compute_n(const int ago, const int inum_full,
                          const int nall, double **host_x, int *host_type,
                          double *boxlo, double *boxhi, int *tag, int **nspecial, 
                          int **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
-                         const double cpu_time, bool &success, double *host_q) {
+                         int **ilist, int **jnum, const double cpu_time,
+                         bool &success, double *host_q) {
   return CMMMMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                         boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                        vatom, host_start, cpu_time, success, host_q);
+                        vatom, host_start, ilist, jnum, cpu_time, success,
+                        host_q);
 }  
 			
 void cmmm_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/crml_gpu.cpp b/lib/gpu/crml_gpu.cpp
index 347febe729..ea15346747 100644
--- a/lib/gpu/crml_gpu.cpp
+++ b/lib/gpu/crml_gpu.cpp
@@ -99,6 +99,7 @@ bool crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
   }
   if (message)
     fprintf(screen,"\n");
+  CRMLMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -106,15 +107,17 @@ void crml_gpu_clear() {
   CRMLMF.clear();
 }
 
-int * crml_gpu_compute_n(const int ago, const int inum_full,
+int** crml_gpu_compute_n(const int ago, const int inum_full,
                          const int nall, double **host_x, int *host_type,
                          double *boxlo, double *boxhi, int *tag, int **nspecial, 
                          int **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
-                         const double cpu_time, bool &success, double *host_q) {
+                         int **ilist, int **jnum, const double cpu_time,
+                         bool &success, double *host_q) {
   return CRMLMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                         boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                        vatom, host_start, cpu_time, success, host_q);
+                        vatom, host_start, ilist, jnum, cpu_time, success,
+                        host_q);
 }  
 			
 void crml_gpu_compute(const int ago, const int inum_full,
diff --git a/lib/gpu/gb_gpu.cpp b/lib/gpu/gb_gpu.cpp
index 7afe4ff262..c13a5ae638 100644
--- a/lib/gpu/gb_gpu.cpp
+++ b/lib/gpu/gb_gpu.cpp
@@ -114,6 +114,7 @@ bool gb_gpu_init(const int ntypes, const double gamma,
   }
   if (message)
     fprintf(screen,"\n");
+  GBMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -326,14 +327,14 @@ void _gb_gpu_gayberne(GBMT &gbm, const bool _eflag, const bool _vflag) {
 // Reneighbor on GPU if necessary and then compute forces, torques, energies
 // ---------------------------------------------------------------------------
 template <class gbmtyp>
-inline int * _gb_gpu_compute_n(gbmtyp &gbm, const int ago,
-		               const int inum_full, const int nall,
-			       double **host_x, int *host_type,
-			       double *boxlo, double *boxhi, const bool eflag,
-			       const bool vflag, const bool eatom,
+inline int** _gb_gpu_compute_n(gbmtyp &gbm, const int ago,
+                               const int inum_full, const int nall,
+                               double **host_x, int *host_type,
+                               double *boxlo, double *boxhi, const bool eflag,
+                               const bool vflag, const bool eatom,
                                const bool vatom, int &host_start,
-		               const double cpu_time, bool &success,
-			       double **host_quat) {
+                               int **ilist, int **jnum, const double cpu_time,
+                               bool &success, double **host_quat) {
   gbm.acc_timers();
   if (inum_full==0) {
     host_start=0;
@@ -363,23 +364,25 @@ inline int * _gb_gpu_compute_n(gbmtyp &gbm, const int ago,
   }
 
   gbm.atom->add_quat_data();
+  *ilist=gbm.nbor->host_ilist.begin();
+  *jnum=gbm.nbor->host_acc.begin();
 
   _gb_gpu_gayberne<PRECISION,ACC_PRECISION>(gbm,eflag,vflag);
   gbm.ans->copy_answers(eflag,vflag,eatom,vatom);
   gbm.device->add_ans_object(gbm.ans);
   gbm.hd_balancer.stop_timer();
-  return gbm.nbor->host_nbor.begin();
+  return gbm.nbor->host_jlist.begin()-host_start;
 }
 
-int * gb_gpu_compute_n(const int ago, const int inum_full, const int nall,
+int** gb_gpu_compute_n(const int ago, const int inum_full, const int nall,
                        double **host_x, int *host_type, double *boxlo,
                        double *boxhi, const bool eflag, const bool vflag,
                        const bool eatom, const bool vatom, int &host_start,
-                       const double cpu_time, bool &success,
-                       double **host_quat) {
+                       int **ilist, int **jnum, const double cpu_time,
+                       bool &success, double **host_quat) {
   return _gb_gpu_compute_n(GBMF, ago, inum_full, nall, host_x, host_type, boxlo,
-                           boxhi, eflag, vflag, eatom, vatom, host_start,
-                           cpu_time, success, host_quat);
+                           boxhi, eflag, vflag, eatom, vatom, host_start, ilist,
+                           jnum, cpu_time, success, host_quat);
 }  
 
 // ---------------------------------------------------------------------------
diff --git a/lib/gpu/gb_gpu_memory.cpp b/lib/gpu/gb_gpu_memory.cpp
index 61078b8122..6592bc1e40 100644
--- a/lib/gpu/gb_gpu_memory.cpp
+++ b/lib/gpu/gb_gpu_memory.cpp
@@ -198,6 +198,11 @@ bool GB_GPU_MemoryT::init(const int ntypes, const double gamma,
   return (host_olist.alloc(nbor->max_atoms(),*ucl_device)==UCL_SUCCESS);
 }
 
+template <class numtyp, class acctyp>
+void GB_GPU_MemoryT::estimate_gpu_overhead() {
+  device->estimate_gpu_overhead(2,_gpu_overhead,_driver_overhead);
+}
+
 template <class numtyp, class acctyp>
 void GB_GPU_MemoryT::clear() {
   if (!_allocated)
@@ -213,7 +218,7 @@ void GB_GPU_MemoryT::clear() {
 
   // Output any timing information
   acc_timers();
-  double single[6], times[6];
+  double single[9], times[9];
 
   single[0]=atom->transfer_time()+ans->transfer_time();
   single[1]=nbor->time_nbor.total_seconds();
@@ -225,6 +230,9 @@ void GB_GPU_MemoryT::clear() {
   else
     single[4]=0;
   single[5]=atom->cast_time()+ans->cast_time();
+  single[6]=_gpu_overhead;
+  single[7]=_driver_overhead;
+  single[8]=ans->cpu_idle_time();
 
   MPI_Reduce(single,times,6,MPI_DOUBLE,MPI_SUM,0,device->replica());
   double avg_split=hd_balancer.all_avg_split();
@@ -259,10 +267,19 @@ void GB_GPU_MemoryT::clear() {
         fprintf(screen,"Force calc:      %.4f s.\n",times[3]/replica_size);
         fprintf(screen,"LJ calc:         %.4f s.\n",times[4]/replica_size);
       }
+      fprintf(screen,"GPU Overhead:    %.4f s.\n",times[6]/replica_size);
       fprintf(screen,"Average split:   %.4f.\n",avg_split);
       fprintf(screen,"Max Mem / Proc:  %.2f MB.\n",max_mb);
+      fprintf(screen,"CPU Driver_Time: %.4f s.\n",times[7]/replica_size);
+      fprintf(screen,"CPU Idle_Time:   %.4f s.\n",times[8]/replica_size);
       fprintf(screen,"-------------------------------------");
       fprintf(screen,"--------------------------------\n\n");
+
+
+      fprintf(screen,"Average split:   %.4f.\n",avg_split);
+      fprintf(screen,"Max Mem / Proc:  %.2f MB.\n",max_mb);
+
+
     }
   _max_bytes=0.0;
 
diff --git a/lib/gpu/gb_gpu_memory.h b/lib/gpu/gb_gpu_memory.h
index b9c0ce8c53..edba318ace 100644
--- a/lib/gpu/gb_gpu_memory.h
+++ b/lib/gpu/gb_gpu_memory.h
@@ -46,6 +46,9 @@ class GB_GPU_Memory {
             const int max_nbors, const double cell_size,
             const double gpu_split, FILE *screen);
 
+  /// Estimate the overhead for GPU context changes and CPU driver
+  void estimate_gpu_overhead();
+
   /// Check if there is enough storage for atom arrays and realloc if not
   /** \param success set to false if insufficient memory **/
   inline void resize_atom(const int inum, const int nall, bool &success) {
@@ -194,6 +197,7 @@ class GB_GPU_Memory {
   bool _allocated, _compiled;
   int _block_size;
   double _max_bytes;
+  double _gpu_overhead, _driver_overhead;
   
   void compile_kernels(UCL_Device &dev);
 };
diff --git a/lib/gpu/lj96_cut_gpu.cpp b/lib/gpu/lj96_cut_gpu.cpp
index fe5df7826e..9b2a555ac4 100644
--- a/lib/gpu/lj96_cut_gpu.cpp
+++ b/lib/gpu/lj96_cut_gpu.cpp
@@ -88,6 +88,7 @@ bool lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
   }
   if (message)
     fprintf(screen,"\n");
+  LJ96MF.estimate_gpu_overhead();
   return true;
 }
 
@@ -95,15 +96,16 @@ void lj96_gpu_clear() {
   LJ96MF.clear();
 }
 
-int * lj96_gpu_compute_n(const int ago, const int inum_full,
+int** lj96_gpu_compute_n(const int ago, const int inum_full,
                          const int nall, double **host_x, int *host_type,
                          double *boxlo, double *boxhi, int *tag, int **nspecial,
                          int **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
-                         const double cpu_time, bool &success) {
+                         int **ilist, int **jnum, const double cpu_time,
+                         bool &success) {
   return LJ96MF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                         boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                        vatom, host_start, cpu_time, success);
+                        vatom, host_start, ilist, jnum, cpu_time, success);
 }  
 			
 void lj96_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/lj_expand_gpu.cpp b/lib/gpu/lj_expand_gpu.cpp
index 26c42546af..cbfb215dac 100644
--- a/lib/gpu/lj_expand_gpu.cpp
+++ b/lib/gpu/lj_expand_gpu.cpp
@@ -88,6 +88,7 @@ bool lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
   }
   if (message)
     fprintf(screen,"\n");
+  LJEMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -95,15 +96,16 @@ void lje_gpu_clear() {
   LJEMF.clear();
 }
 
-int * lje_gpu_compute_n(const int ago, const int inum_full,
+int** lje_gpu_compute_n(const int ago, const int inum_full,
                         const int nall, double **host_x, int *host_type,
                         double *boxlo, double *boxhi, int *tag, int **nspecial,
                         int **special, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success) {
+                        int **ilist, int **jnum, const double cpu_time,
+                        bool &success) {
   return LJEMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                        boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                       vatom, host_start, cpu_time, success);
+                       vatom, host_start, ilist, jnum, cpu_time, success);
 }  
 			
 void lje_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/ljc_cut_gpu.cpp b/lib/gpu/ljc_cut_gpu.cpp
index 209bebeb76..1dbf06f3e6 100644
--- a/lib/gpu/ljc_cut_gpu.cpp
+++ b/lib/gpu/ljc_cut_gpu.cpp
@@ -93,6 +93,7 @@ bool ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
   }
   if (message)
     fprintf(screen,"\n");
+  LJCMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -100,15 +101,17 @@ void ljc_gpu_clear() {
   LJCMF.clear();
 }
 
-int * ljc_gpu_compute_n(const int ago, const int inum_full,
+int** ljc_gpu_compute_n(const int ago, const int inum_full,
                         const int nall, double **host_x, int *host_type,
                         double *boxlo, double *boxhi, int *tag, int **nspecial, 
                         int **special, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success, double *host_q) {
+                        int **ilist, int **jnum, const double cpu_time,
+                        bool &success, double *host_q) {
   return LJCMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                        boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                       vatom, host_start, cpu_time, success, host_q);
+                       vatom, host_start, ilist, jnum, cpu_time, success,
+                       host_q);
 }  
 			
 void ljc_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/ljcl_cut_gpu.cpp b/lib/gpu/ljcl_cut_gpu.cpp
index 570bf0326d..23b45114db 100644
--- a/lib/gpu/ljcl_cut_gpu.cpp
+++ b/lib/gpu/ljcl_cut_gpu.cpp
@@ -94,6 +94,7 @@ bool ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
   }
   if (message)
     fprintf(screen,"\n");
+  LJCLMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -101,15 +102,17 @@ void ljcl_gpu_clear() {
   LJCLMF.clear();
 }
 
-int * ljcl_gpu_compute_n(const int ago, const int inum_full,
+int** ljcl_gpu_compute_n(const int ago, const int inum_full,
                          const int nall, double **host_x, int *host_type,
                          double *boxlo, double *boxhi, int *tag, int **nspecial, 
                          int **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
-                         const double cpu_time, bool &success, double *host_q) {
+                         int **ilist, int **jnum,  const double cpu_time,
+                         bool &success, double *host_q) {
   return LJCLMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                         boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                        vatom, host_start, cpu_time, success, host_q);
+                        vatom, host_start, ilist, jnum, cpu_time, success,
+                        host_q);
 }  
 			
 void ljcl_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/morse_gpu.cpp b/lib/gpu/morse_gpu.cpp
index 1a3d3b0182..bcdb63c4a8 100644
--- a/lib/gpu/morse_gpu.cpp
+++ b/lib/gpu/morse_gpu.cpp
@@ -89,6 +89,7 @@ bool mor_gpu_init(const int ntypes, double **cutsq,
   }
   if (message)
     fprintf(screen,"\n");
+  MORMF.estimate_gpu_overhead();
   return true;
 }
 
@@ -96,15 +97,16 @@ void mor_gpu_clear() {
   MORMF.clear();
 }
 
-int * mor_gpu_compute_n(const int ago, const int inum_full,
+int** mor_gpu_compute_n(const int ago, const int inum_full,
                         const int nall, double **host_x, int *host_type,
                         double *boxlo, double *boxhi, int *tag, int **nspecial,
                         int **special, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success) {
+                        int **ilist, int **jnum, const double cpu_time,
+                        bool &success) {
   return MORMF.compute(ago, inum_full, nall, host_x, host_type, boxlo,
                        boxhi, tag, nspecial, special, eflag, vflag, eatom,
-                       vatom, host_start, cpu_time, success);
+                       vatom, host_start, ilist, jnum, cpu_time, success);
 }  
 			
 void mor_gpu_compute(const int ago, const int inum_full, const int nall,
diff --git a/lib/gpu/pair_gpu_device.cpp b/lib/gpu/pair_gpu_device.cpp
index 30f3dda0be..837e5f52c6 100644
--- a/lib/gpu/pair_gpu_device.cpp
+++ b/lib/gpu/pair_gpu_device.cpp
@@ -246,9 +246,9 @@ template <class numtyp, class acctyp>
 void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls, 
                                            double &gpu_overhead,
                                            double &gpu_driver_overhead) {
-  UCL_H_Vec<int> *host_data_in, *host_data_out;
-  UCL_D_Vec<int> *dev_data_in, *dev_data_out, *kernel_data;
-  UCL_Timer *timers_in, *timers_out, *timers_kernel;
+  UCL_H_Vec<int> *host_data_in=NULL, *host_data_out=NULL;
+  UCL_D_Vec<int> *dev_data_in=NULL, *dev_data_out=NULL, *kernel_data=NULL;
+  UCL_Timer *timers_in=NULL, *timers_out=NULL, *timers_kernel=NULL;
   UCL_Timer over_timer(*gpu);
 
   if (_data_in_estimate>0) {
@@ -291,7 +291,6 @@ void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls,
   for (int i=0; i<10; i++) {
     gpu->sync();
     gpu_barrier();
-    double driver_t=MPI_Wtime();
     over_timer.start();
     gpu->sync();
     gpu_barrier();
diff --git a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp
index 4e8169df1b..c5e7bdb503 100644
--- a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp
+++ b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp
@@ -105,12 +105,12 @@ void PairCGCMMCoulLongGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = cmml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-				 atom->type, domain->sublo, domain->subhi,
-				 atom->tag, atom->nspecial, atom->special,
-				 eflag, vflag, eflag_atom, vflag_atom,
-				 host_start, &ilist, &numneigh, cpu_time,
-				 success, atom->q);
+    firstneigh = cmml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				    atom->type, domain->sublo, domain->subhi,
+				    atom->tag, atom->nspecial, atom->special,
+				    eflag, vflag, eflag_atom, vflag_atom,
+				    host_start, &ilist, &numneigh, cpu_time,
+				    success, atom->q);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp
index 073c1a2d3f..566da1830c 100644
--- a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp
+++ b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp
@@ -98,12 +98,12 @@ void PairCGCMMCoulMSMGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = cmmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-				 atom->type, domain->sublo, domain->subhi,
-				 atom->tag, atom->nspecial, atom->special,
-				 eflag, vflag, eflag_atom, vflag_atom,
-				 host_start, &ilist, &numneigh, cpu_time,
-				 success, atom->q);
+    firstneigh = cmmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				    atom->type, domain->sublo, domain->subhi,
+				    atom->tag, atom->nspecial, atom->special,
+				    eflag, vflag, eflag_atom, vflag_atom,
+				    host_start, &ilist, &numneigh, cpu_time,
+				    success, atom->q);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_cg_cmm_gpu.cpp b/src/GPU/pair_cg_cmm_gpu.cpp
index 219cf00ae2..6fd4670abd 100644
--- a/src/GPU/pair_cg_cmm_gpu.cpp
+++ b/src/GPU/pair_cg_cmm_gpu.cpp
@@ -94,11 +94,12 @@ void PairCGCMMGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = cmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type,
-				domain->sublo, domain->subhi, atom->tag,
-				atom->nspecial, atom->special, eflag, vflag,
-				eflag_atom, vflag_atom, host_start, &ilist,
-				&numneigh, cpu_time, success);
+    firstneigh = cmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				   atom->type, domain->sublo, domain->subhi,
+				   atom->tag, atom->nspecial, atom->special,
+				   eflag, vflag, eflag_atom, vflag_atom,
+				   host_start, &ilist, &numneigh, cpu_time,
+				   success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_gayberne_gpu.cpp b/src/GPU/pair_gayberne_gpu.cpp
index 94e4c36ce2..c199d1ddde 100644
--- a/src/GPU/pair_gayberne_gpu.cpp
+++ b/src/GPU/pair_gayberne_gpu.cpp
@@ -98,11 +98,11 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;  
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type,
-			       domain->sublo, domain->subhi, eflag, vflag,
-			       eflag_atom, vflag_atom, host_start,
-                               &ilist, &numneigh, cpu_time, success,
-			       atom->quat);
+    firstneigh = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				  atom->type, domain->sublo, domain->subhi,
+				  eflag, vflag, eflag_atom, vflag_atom,
+				  host_start, &ilist, &numneigh, cpu_time,
+				  success, atom->quat);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_lj96_cut_gpu.cpp b/src/GPU/pair_lj96_cut_gpu.cpp
index 925c4c4b0d..1f47d32582 100644
--- a/src/GPU/pair_lj96_cut_gpu.cpp
+++ b/src/GPU/pair_lj96_cut_gpu.cpp
@@ -93,12 +93,12 @@ void PairLJ96CutGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;  
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-				 atom->type, domain->sublo, domain->subhi,
-				 atom->tag, atom->nspecial, atom->special,
-				 eflag, vflag, eflag_atom, vflag_atom,
-				 host_start, &ilist, &numneigh, cpu_time,
-				 success);
+    firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				    atom->type, domain->sublo, domain->subhi,
+				    atom->tag, atom->nspecial, atom->special,
+				    eflag, vflag, eflag_atom, vflag_atom,
+				    host_start, &ilist, &numneigh, cpu_time,
+				    success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
index 6bae770eed..e516925008 100644
--- a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
@@ -108,11 +108,12 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-				 atom->type, domain->sublo, domain->subhi,
-				 atom->tag, atom->nspecial, atom->special,
-				 eflag, vflag, eflag_atom, vflag_atom,
-				 host_start, cpu_time, success, atom->q);
+    firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				    atom->type, domain->sublo, domain->subhi,
+				    atom->tag, atom->nspecial, atom->special,
+				    eflag, vflag, eflag_atom, vflag_atom,
+				    host_start, &ilist, &numneigh, cpu_time,
+				    success, atom->q);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
index 47b8b9065e..db061bd4d8 100644
--- a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
@@ -96,12 +96,12 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;  
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-				atom->type, domain->sublo, domain->subhi,
-				atom->tag, atom->nspecial, atom->special,
-				eflag, vflag, eflag_atom, vflag_atom,
-				host_start, &ilist, &numneigh, cpu_time,
-				success, atom->q);
+    firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				   atom->type, domain->sublo, domain->subhi,
+				   atom->tag, atom->nspecial, atom->special,
+				   eflag, vflag, eflag_atom, vflag_atom,
+				   host_start, &ilist, &numneigh, cpu_time,
+				   success, atom->q);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_gpu.cpp
index 9417b4206b..c03cd87d61 100644
--- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp
@@ -106,12 +106,12 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-				 atom->type, domain->sublo, domain->subhi,
-				 atom->tag, atom->nspecial, atom->special,
-				 eflag, vflag, eflag_atom, vflag_atom,
-				 host_start, &ilist, &numneigh, cpu_time,
-				 success, atom->q);
+    firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				    atom->type, domain->sublo, domain->subhi,
+				    atom->tag, atom->nspecial, atom->special,
+				    eflag, vflag, eflag_atom, vflag_atom,
+				    host_start, &ilist, &numneigh, cpu_time,
+				    success, atom->q);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_lj_cut_tgpu.cpp b/src/GPU/pair_lj_cut_tgpu.cpp
index 75885c231e..004d145c33 100644
--- a/src/GPU/pair_lj_cut_tgpu.cpp
+++ b/src/GPU/pair_lj_cut_tgpu.cpp
@@ -98,12 +98,12 @@ void PairLJCutTGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = ljl_gpu_compute_n(neighbor->ago, inum, nall,
-			        atom->x, atom->type, domain->sublo,
-				domain->subhi, atom->tag, atom->nspecial,
-                                atom->special, eflag, vflag, eflag_atom,
-                                vflag_atom, host_start, &ilist, &numneigh,
-				cpu_time, success);
+    firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall,
+				   atom->x, atom->type, domain->sublo,
+				   domain->subhi, atom->tag, atom->nspecial,
+				   atom->special, eflag, vflag, eflag_atom,
+				   vflag_atom, host_start, &ilist, &numneigh,
+				   cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_lj_expand_gpu.cpp b/src/GPU/pair_lj_expand_gpu.cpp
index 75558b0a21..b2839b4839 100644
--- a/src/GPU/pair_lj_expand_gpu.cpp
+++ b/src/GPU/pair_lj_expand_gpu.cpp
@@ -94,12 +94,12 @@ void PairLJExpandGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-				atom->type, domain->sublo, domain->subhi,
-				atom->tag, atom->nspecial, atom->special,
-				eflag, vflag, eflag_atom, vflag_atom,
-				host_start, &ilist, &numneigh, cpu_time,
-				success);
+    firstneigh = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+				   atom->type, domain->sublo, domain->subhi,
+				   atom->tag, atom->nspecial, atom->special,
+				   eflag, vflag, eflag_atom, vflag_atom,
+				   host_start, &ilist, &numneigh, cpu_time,
+				   success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_morse_gpu.cpp b/src/GPU/pair_morse_gpu.cpp
index c3ac27c8f1..e77d5bd29b 100644
--- a/src/GPU/pair_morse_gpu.cpp
+++ b/src/GPU/pair_morse_gpu.cpp
@@ -92,12 +92,12 @@ void PairMorseGPU::compute(int eflag, int vflag)
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode == GPU_NEIGH) {
     inum = atom->nlocal;
-    gpulist = mor_gpu_compute_n(neighbor->ago, inum, nall,
-			        atom->x, atom->type, domain->sublo,
-				domain->subhi, atom->tag, atom->nspecial,
-                                atom->special, eflag, vflag, eflag_atom,
-                                vflag_atom, host_start, &ilist, &numneigh,
-				cpu_time, success);
+    firstneigh = mor_gpu_compute_n(neighbor->ago, inum, nall,
+				   atom->x, atom->type, domain->sublo,
+				   domain->subhi, atom->tag, atom->nspecial,
+				   atom->special, eflag, vflag, eflag_atom,
+				   vflag_atom, host_start, &ilist, &numneigh,
+				   cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
diff --git a/src/GPU/pair_morse_gpu.h b/src/GPU/pair_morse_gpu.h
index f146682a97..ee9ba576d5 100644
--- a/src/GPU/pair_morse_gpu.h
+++ b/src/GPU/pair_morse_gpu.h
@@ -29,6 +29,7 @@ class PairMorseGPU : public PairMorse {
   PairMorseGPU(LAMMPS *lmp);
   ~PairMorseGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int);
   void init_style();
   double memory_usage();