diff --git a/lib/gpu/atom.cpp b/lib/gpu/atom.cpp
index fd5b687dde..f8526b99fc 100644
--- a/lib/gpu/atom.cpp
+++ b/lib/gpu/atom.cpp
@@ -32,7 +32,7 @@ AtomT::Atom() : _compiled(false),_allocated(false),
 template <class numtyp, class acctyp>
 int AtomT::bytes_per_atom() const { 
   int id_space=0;
-  if (_gpu_nbor)
+  if (_gpu_nbor==1)
     id_space=2;
   int bytes=4*sizeof(numtyp)+id_space;
   if (_rot)
@@ -55,7 +55,7 @@ bool AtomT::alloc(const int nall) {
     
   // Allocate storage for CUDPP sort
   #ifndef USE_OPENCL
-  if (_gpu_nbor) {
+  if (_gpu_nbor==1) {
     CUDPPResult result = cudppPlan(&sort_plan, sort_config, _max_atoms, 1, 0);  
     if (CUDPP_SUCCESS != result)
       return false;
@@ -118,10 +118,12 @@ bool AtomT::alloc(const int nall) {
       gpu_bytes+=dev_quat.row_bytes();
     }
   }
-  if (_gpu_nbor) {
-    success=success && (dev_cell_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
-    success=success && (dev_particle_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
-    gpu_bytes+=dev_cell_id.row_bytes()+dev_particle_id.row_bytes();
+  if (_gpu_nbor>0) {
+    if (_gpu_nbor==1) {
+      success=success && (dev_cell_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
+      success=success && (dev_particle_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
+      gpu_bytes+=dev_cell_id.row_bytes()+dev_particle_id.row_bytes();
+    }
     if (_bonds) {
       success=success && (dev_tag.alloc(_max_atoms,*dev)==UCL_SUCCESS);
       gpu_bytes+=dev_tag.row_bytes();
@@ -138,7 +140,7 @@ bool AtomT::alloc(const int nall) {
 
 template <class numtyp, class acctyp>
 bool AtomT::add_fields(const bool charge, const bool rot,
-                              const bool gpu_nbor, const bool bonds) {
+                       const int gpu_nbor, const bool bonds) {
   bool realloc=false;
   if (charge && _charge==false) {
     _charge=true;
@@ -148,8 +150,8 @@ bool AtomT::add_fields(const bool charge, const bool rot,
     _rot=true;
     realloc=true;
   }
-  if (gpu_nbor && _gpu_nbor==false) {
-    _gpu_nbor=true;
+  if (gpu_nbor>0 && _gpu_nbor==0) {
+    _gpu_nbor=gpu_nbor;
     realloc=true;
   }
   if (bonds && _bonds==false) {
@@ -167,8 +169,7 @@ bool AtomT::add_fields(const bool charge, const bool rot,
 
 template <class numtyp, class acctyp>
 bool AtomT::init(const int nall, const bool charge, const bool rot,
-                        UCL_Device &devi, const bool gpu_nbor,
-                        const bool bonds) {
+                 UCL_Device &devi, const int gpu_nbor, const bool bonds) {
   clear();
 
   bool success=true;
@@ -234,7 +235,7 @@ void AtomT::clear_resize() {
   #endif
 
   #ifndef USE_OPENCL
-  if (_gpu_nbor) cudppDestroyPlan(sort_plan);
+  if (_gpu_nbor==1) cudppDestroyPlan(sort_plan);
   #endif
 }
 
diff --git a/lib/gpu/atom.h b/lib/gpu/atom.h
index 9c99a4d45e..46e0a4a337 100644
--- a/lib/gpu/atom.h
+++ b/lib/gpu/atom.h
@@ -59,9 +59,11 @@ class Atom {
 
   /// Clear any previous data and set up for a new LAMMPS run
   /** \param rot True if atom storage needs quaternions
-    * \param gpu_nbor True if neighboring will be performed on device **/
+    * \param gpu_nbor 0 if neighboring will be performed on host
+    *        gpu_nbor 1 if neighboring will be performed on device
+    *        gpu_nbor 2 if binning on host and neighboring on device **/
   bool init(const int nall, const bool charge, const bool rot, 
-            UCL_Device &dev, const bool gpu_nbor=false, const bool bonds=false);
+            UCL_Device &dev, const int gpu_nbor=0, const bool bonds=false);
   
   /// Check if we have enough device storage and realloc if not
   /** Returns true if resized with any call during this timestep **/
@@ -77,8 +79,10 @@ class Atom {
   
   /// If already initialized by another LAMMPS style, add fields as necessary
   /** \param rot True if atom storage needs quaternions
-    * \param gpu_nbor True if neighboring will be performed on device **/
-  bool add_fields(const bool charge, const bool rot, const bool gpu_nbor,
+    * \param gpu_nbor 0 if neighboring will be performed on host
+    *        gpu_nbor 1 if neighboring will be performed on device
+    *        gpu_nbor 2 if binning on host and neighboring on device **/
+  bool add_fields(const bool charge, const bool rot, const int gpu_nbor,
                   const bool bonds);
   
   /// Returns true if GPU is using charges
@@ -401,8 +405,8 @@ class Atom {
   bool alloc(const int nall);
   
   bool _allocated, _rot, _charge, _other;
-  int _max_atoms, _nall;
-  bool _gpu_nbor, _bonds;
+  int _max_atoms, _nall, _gpu_nbor;
+  bool _bonds;
   double _time_cast;
   
   double _max_gpu_bytes;
diff --git a/lib/gpu/balance.h b/lib/gpu/balance.h
index 02ba53d7c7..1306ba314a 100644
--- a/lib/gpu/balance.h
+++ b/lib/gpu/balance.h
@@ -33,7 +33,7 @@ class Balance {
   inline ~Balance() { clear(); }
 
   /// Clear any old data and setup for new LAMMPS run
-  inline void init(Device<numtyp, acctyp> *gpu, const bool gpu_nbor,
+  inline void init(Device<numtyp, acctyp> *gpu, const int gpu_nbor,
                    const double split);
 
   /// Clear all host and device data
@@ -50,9 +50,9 @@ class Balance {
 
   /// Get a count of the number of particles host will handle for initial alloc
   inline int first_host_count(const int nlocal, const double gpu_split,
-                              const bool gpu_nbor) const {
+                              const int gpu_nbor) const {
     int host_nlocal=0;
-    if (gpu_nbor && gpu_split!=1.0) {
+    if (gpu_nbor>0 && gpu_split!=1.0) {
       if (gpu_split>0)
         host_nlocal=static_cast<int>(ceil((1.0-gpu_split)*nlocal));
       else
@@ -109,7 +109,8 @@ class Balance {
  private:
   Device<numtyp,acctyp> *_device;
   UCL_Timer _device_time;
-  bool _init_done, _gpu_nbor;
+  bool _init_done;
+  int _gpu_nbor;
   
   bool _load_balance;
   double _actual_split, _avg_split, _desired_split, _max_split;
@@ -123,7 +124,7 @@ class Balance {
 
 template <class numtyp, class acctyp>
 void BalanceT::init(Device<numtyp, acctyp> *gpu, 
-                           const bool gpu_nbor, const double split) {
+                           const int gpu_nbor, const double split) {
   clear();
   _gpu_nbor=gpu_nbor;
   _init_done=true;
@@ -190,13 +191,12 @@ void BalanceT::balance(const double cpu_time) {
     if (_desired_split<0.0)
       _desired_split=0.0;
 
-    if (!_gpu_nbor) {
+    if (_gpu_nbor==0) {
       if (_desired_split<_max_split)
         _actual_split=_desired_split;
       else
         _actual_split=_max_split;
     }
-//std::cout << gpu_time << " " << max_gpu_time << " " << cpu_other_time << " " << cpu_time_per_atom << " " << cpu_time << " " << _desired_split << " " << host_inum << std::endl;
   }
   _avg_split+=_desired_split;
   _avg_count++;
diff --git a/lib/gpu/base_atomic.cpp b/lib/gpu/base_atomic.cpp
index 7b8027c471..f4ad2dc6ab 100644
--- a/lib/gpu/base_atomic.cpp
+++ b/lib/gpu/base_atomic.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                                base_atomic.h
+                               base_atomic.cpp
                              -------------------
                             W. Michael Brown (ORNL)
 
@@ -47,9 +47,11 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
   nbor_time_avail=false;
   screen=_screen;
 
-  bool gpu_nbor=false;
+  int gpu_nbor=0;
   if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
-    gpu_nbor=true;
+    gpu_nbor=1;
+  else if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
+    gpu_nbor=2;
 
   int _gpu_host=0;
   int host_nlocal=hd_balancer.first_host_count(nlocal,gpu_split,gpu_nbor);
@@ -57,7 +59,7 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
     _gpu_host=1;
 
   _threads_per_atom=device->threads_per_atom();
-  if (_threads_per_atom>1 && gpu_nbor==false) {
+  if (_threads_per_atom>1 && gpu_nbor==0) {
     nbor->packing(true);
     _nbor_data=&(nbor->dev_packed);
   } else
diff --git a/lib/gpu/base_charge.cpp b/lib/gpu/base_charge.cpp
index c4de07c7cc..4f9a07f118 100644
--- a/lib/gpu/base_charge.cpp
+++ b/lib/gpu/base_charge.cpp
@@ -48,9 +48,11 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
   nbor_time_avail=false;
   screen=_screen;
 
-  bool gpu_nbor=false;
+  int gpu_nbor=0;
   if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
-    gpu_nbor=true;
+    gpu_nbor=1;
+  else if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
+    gpu_nbor=2;
 
   int _gpu_host=0;
   int host_nlocal=hd_balancer.first_host_count(nlocal,gpu_split,gpu_nbor);
@@ -58,7 +60,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
     _gpu_host=1;
 
   _threads_per_atom=device->threads_per_charge();
-  if (_threads_per_atom>1 && gpu_nbor==false) {
+  if (_threads_per_atom>1 && gpu_nbor==0) {
     nbor->packing(true);
     _nbor_data=&(nbor->dev_packed);
   } else
diff --git a/lib/gpu/base_ellipsoid.cpp b/lib/gpu/base_ellipsoid.cpp
index f24dd1e7a8..9c7c9cab11 100644
--- a/lib/gpu/base_ellipsoid.cpp
+++ b/lib/gpu/base_ellipsoid.cpp
@@ -55,9 +55,11 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
   screen=_screen;
   _ellipsoid_sphere=ellip_sphere;
 
-  bool gpu_nbor=false;
+  int gpu_nbor=0;
   if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
-    gpu_nbor=true;
+    gpu_nbor=1;
+  else if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
+    gpu_nbor=2;
 
   int _gpu_host=0;
   int host_nlocal=hd_balancer.first_host_count(nlocal,gpu_split,gpu_nbor);
@@ -204,7 +206,7 @@ void BaseEllipsoidT::output_times() {
         fprintf(screen,"Data Transfer:   %.4f s.\n",times[0]/replica_size);
         fprintf(screen,"Data Cast/Pack:  %.4f s.\n",times[5]/replica_size);
         fprintf(screen,"Neighbor copy:   %.4f s.\n",times[1]/replica_size);
-        if (nbor->gpu_nbor())
+        if (nbor->gpu_nbor()>0)
           fprintf(screen,"Neighbor build:  %.4f s.\n",times[2]/replica_size);
         else
           fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size);
diff --git a/lib/gpu/device.cpp b/lib/gpu/device.cpp
index 1882848d84..130c311004 100644
--- a/lib/gpu/device.cpp
+++ b/lib/gpu/device.cpp
@@ -154,13 +154,15 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
   if (_particle_split<1.0 && _particle_split>0.0)
     ef_nlocal=static_cast<int>(_particle_split*nlocal);
 
-  bool gpu_nbor=false;
-  if (_gpu_mode==GPU_NEIGH)
-    gpu_nbor=true;
-    
+  int gpu_nbor=0;
+  if (_gpu_mode==Device<numtyp,acctyp>::GPU_NEIGH)
+    gpu_nbor=1;
+  else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
+    gpu_nbor=2;
+
   if (_init_count==0) {
     // Initialize atom and nbor data
-    if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor && maxspecial>0))
+    if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor>0 && maxspecial>0))
       return -3;
       
     _data_in_estimate++;
@@ -173,7 +175,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
       _data_in_estimate++;
     if (atom.quat()==false && rot)
       _data_in_estimate++;
-    if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor && maxspecial))
+    if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial))
       return -3;
   }
   
@@ -420,7 +422,7 @@ void DeviceT::output_times(UCL_Timer &time_pair,
         fprintf(screen,"Data Transfer:   %.4f s.\n",times[0]/_replica_size);
         fprintf(screen,"Data Cast/Pack:  %.4f s.\n",times[4]/_replica_size);
         fprintf(screen,"Neighbor copy:   %.4f s.\n",times[1]/_replica_size);
-        if (nbor.gpu_nbor())
+        if (nbor.gpu_nbor()>0)
           fprintf(screen,"Neighbor build:  %.4f s.\n",times[2]/_replica_size);
         else
           fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size);
diff --git a/lib/gpu/neighbor.cpp b/lib/gpu/neighbor.cpp
index d0446d8025..28a33aa13a 100644
--- a/lib/gpu/neighbor.cpp
+++ b/lib/gpu/neighbor.cpp
@@ -21,7 +21,7 @@
 using namespace LAMMPS_AL;
 
 int Neighbor::bytes_per_atom(const int max_nbors) const {
-  if (_gpu_nbor)
+  if (_gpu_nbor==1)
     return (max_nbors+2)*sizeof(int);
   else if (_use_packing)
     return ((max_nbors+2)*2)*sizeof(int);
@@ -32,7 +32,7 @@ int Neighbor::bytes_per_atom(const int max_nbors) const {
 bool Neighbor::init(NeighborShared *shared, const int inum,
                        const int host_inum, const int max_nbors, 
                        const int maxspecial, UCL_Device &devi, 
-                       const bool gpu_nbor, const int gpu_host, 
+                       const int gpu_nbor, const int gpu_host, 
                        const bool pre_cut, const int block_cell_2d,
                        const int block_cell_id, const int block_nbor_build) {
   clear();
@@ -51,7 +51,7 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
     // Not yet implemented
     assert(0==1);
   
-  if (pre_cut || gpu_nbor==false)
+  if (pre_cut || gpu_nbor==0)
     _alloc_packed=true;
   else
     _alloc_packed=false;
@@ -72,10 +72,10 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
   _max_nbors=max_nbors;
 
   _maxspecial=maxspecial;
-  if (gpu_nbor==false)
+  if (gpu_nbor==0)
     _maxspecial=0;
 
-  if (gpu_nbor==false)
+  if (gpu_nbor==0)
     success=success && (host_packed.alloc(2*IJ_SIZE,*dev,
                                           UCL_WRITE_OPTIMIZED)==UCL_SUCCESS);
   alloc(success);
@@ -92,7 +92,7 @@ void Neighbor::alloc(bool &success) {
   dev_nbor.clear();
   host_acc.clear();
   int nt=_max_atoms+_max_host;
-  if (_use_packing==false || _gpu_nbor) 
+  if (_use_packing==false || _gpu_nbor>0) 
     success=success && (dev_nbor.alloc((_max_nbors+2)*_max_atoms,*dev,
                                        UCL_READ_ONLY)==UCL_SUCCESS);
   else 
@@ -181,7 +181,7 @@ void Neighbor::clear() {
 }
 
 double Neighbor::host_memory_usage() const {
-  if (_gpu_nbor) {
+  if (_gpu_nbor>0) {
     if (_gpu_host)
       return host_nbor.row_bytes()*host_nbor.rows()+host_ilist.row_bytes()+
              host_jlist.row_bytes();
diff --git a/lib/gpu/neighbor.h b/lib/gpu/neighbor.h
index 2676a941ba..f5e67b8385 100644
--- a/lib/gpu/neighbor.h
+++ b/lib/gpu/neighbor.h
@@ -52,7 +52,9 @@ class Neighbor {
   /** \param inum Initial number of particles whose neighbors stored on device
     * \param host_inum Initial number of particles whose nbors copied to host
     * \param max_nbors Initial number of rows in the neighbor matrix
-    * \param gpu_nbor True if device will perform neighboring
+    * \param gpu_nbor 0 if neighboring will be performed on host
+    *        gpu_nbor 1 if neighboring will be performed on device
+    *        gpu_nbor 2 if binning on host and neighboring on device
     * \param gpu_host 0 if host will not perform force calculations,
     *                 1 if gpu_nbor is true, and host needs a half nbor list,
     *                 2 if gpu_nbor is true, and host needs a full nbor list
@@ -60,7 +62,7 @@ class Neighbor {
     *                than the force kernel **/
   bool init(NeighborShared *shared, const int inum, const int host_inum,
             const int max_nbors, const int maxspecial, UCL_Device &dev,
-            const bool gpu_nbor, const int gpu_host, const bool pre_cut,
+            const int gpu_nbor, const int gpu_host, const bool pre_cut,
             const int block_cell_2d, const int block_cell_id, 
             const int block_nbor_build);
 
@@ -108,8 +110,11 @@ class Neighbor {
   /// Total host memory used by class
   double host_memory_usage() const;
   
-  /// True if neighboring performed on GPU
-  inline bool gpu_nbor() const { return _gpu_nbor; }
+  /// Returns the type of neighboring:
+  /** - 0 if neighboring will be performed on host
+    * - 1 if neighboring will be performed on device
+    * - 2 if binning on host and neighboring on device **/
+  inline int gpu_nbor() const { return _gpu_nbor; }
   
   /// Make a copy of unpacked nbor lists in the packed storage area (for gb)
   inline void copy_unpacked(const int inum, const int maxj) 
@@ -146,7 +151,7 @@ class Neighbor {
   /// Return the number of bytes used on device
   inline double gpu_bytes() {
     double res = _gpu_bytes + _c_bytes + _cell_bytes;
-    if (_gpu_nbor==false)
+    if (_gpu_nbor==0)
       res += 2*IJ_SIZE*sizeof(int);
 
     return res;
@@ -191,8 +196,8 @@ class Neighbor {
   NeighborShared *_shared;
   UCL_Device *dev;
   bool _allocated, _use_packing;
-  int _max_atoms, _max_nbors, _max_host, _nbor_pitch, _maxspecial;
-  bool _gpu_nbor, _gpu_host, _alloc_packed;
+  int _gpu_nbor, _max_atoms, _max_nbors, _max_host, _nbor_pitch, _maxspecial;
+  bool _gpu_host, _alloc_packed;
   double _cell_size;
 
   double _gpu_bytes, _c_bytes, _cell_bytes;
diff --git a/lib/gpu/neighbor_shared.cpp b/lib/gpu/neighbor_shared.cpp
index 63619fb8fe..8152295dc5 100644
--- a/lib/gpu/neighbor_shared.cpp
+++ b/lib/gpu/neighbor_shared.cpp
@@ -27,9 +27,11 @@ using namespace LAMMPS_AL;
 
 void NeighborShared::clear() {
   if (_compiled) {
-    if (_gpu_nbor) {
-      k_cell_id.clear();
-      k_cell_counts.clear();
+    if (_gpu_nbor>0) {
+      if (_gpu_nbor==1) {
+        k_cell_id.clear();
+        k_cell_counts.clear();
+      }
       k_build_nbor.clear();
       k_transpose.clear();
       k_special.clear();
@@ -42,7 +44,7 @@ void NeighborShared::clear() {
   }
 }
 
-void NeighborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
+void NeighborShared::compile_kernels(UCL_Device &dev, const int gpu_nbor) {
   if (_compiled)
   	return;
   	
@@ -50,20 +52,24 @@ void NeighborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
   std::string flags="-cl-fast-relaxed-math -cl-mad-enable -D"+
                     std::string(OCL_VENDOR);
 
-  if (gpu_nbor==false) {
+  if (gpu_nbor==0) {
     nbor_program=new UCL_Program(dev);
     nbor_program->load_string(neighbor_cpu,flags.c_str());
     k_nbor.set_function(*nbor_program,"kernel_unpack");
   } else {
     build_program=new UCL_Program(dev);
     #ifdef USE_OPENCL
-    std::cerr << "CANNOT CURRENTLY USE GPU NEIGHBORING WITH OPENCL\n";
-    exit(1);
+    if (gpu_nbor==1) {
+      std::cerr << "CANNOT CURRENTLY USE GPU NEIGHBORING WITH OPENCL\n";
+      exit(1);
+    }
     #else
     build_program->load_string(neighbor_gpu,flags.c_str());
     #endif
-    k_cell_id.set_function(*build_program,"calc_cell_id");
-    k_cell_counts.set_function(*build_program,"kernel_calc_cell_counts");
+    if (gpu_nbor==1) {
+      k_cell_id.set_function(*build_program,"calc_cell_id");
+      k_cell_counts.set_function(*build_program,"kernel_calc_cell_counts");
+    }
     k_build_nbor.set_function(*build_program,"calc_neigh_list_cell");
     k_transpose.set_function(*build_program,"transpose");
     k_special.set_function(*build_program,"kernel_special");
diff --git a/lib/gpu/neighbor_shared.h b/lib/gpu/neighbor_shared.h
index feaff4638a..b579e0d600 100644
--- a/lib/gpu/neighbor_shared.h
+++ b/lib/gpu/neighbor_shared.h
@@ -44,7 +44,7 @@ class NeighborShared {
   UCL_Texture neigh_tex;
 
   /// Compile kernels for neighbor lists
-  void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
+  void compile_kernels(UCL_Device &dev, const int gpu_nbor);
 
   // ----------------------------- Kernels
   UCL_Program *nbor_program, *build_program;
@@ -52,7 +52,8 @@ class NeighborShared {
   UCL_Kernel k_transpose, k_special;
 
  private:
-  bool _compiled, _gpu_nbor;
+  bool _compiled;
+  int _gpu_nbor;
 };
 
 }
diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp
index cdcaaa8458..ba7952c4a5 100644
--- a/src/GPU/fix_gpu.cpp
+++ b/src/GPU/fix_gpu.cpp
@@ -28,7 +28,7 @@
 
 using namespace LAMMPS_NS;
 
-enum{GPU_FORCE, GPU_NEIGH};
+enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH};
 
 extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
                            const int first_gpu, const int last_gpu,
@@ -56,6 +56,10 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
     _gpu_mode = GPU_NEIGH;
     if (domain->triclinic)
       error->all("Cannot use force/neigh with triclinic box");
+  } else if (strcmp(arg[3],"force/hybrid_neigh") == 0) {
+    _gpu_mode = GPU_HYB_NEIGH;
+    if (domain->triclinic)
+      error->all("Cannot use force/hybrid_neigh with triclinic box");
   } else
     error->all("Illegal fix GPU command");
 
@@ -117,7 +121,7 @@ void FixGPU::init()
   if ((void*)modify->fix[0] != (void*)this)
     error->all("GPU is not the first fix for this run");
   // Hybrid cannot be used with force/neigh option
-  if (_gpu_mode == GPU_NEIGH)
+  if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
     if (force->pair_match("hybrid",1) != NULL ||
 	force->pair_match("hybrid/overlay",1) != NULL)
       error->all("Cannot use pair hybrid with GPU neighbor builds");
diff --git a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp
index 8c6e8ef931..f964a2b43e 100644
--- a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp
+++ b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp
@@ -79,7 +79,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairCGCMMCoulLongGPU::PairCGCMMCoulLongGPU(LAMMPS *lmp) : PairCGCMMCoulLong(lmp), gpu_mode(GPU_PAIR)
+PairCGCMMCoulLongGPU::PairCGCMMCoulLongGPU(LAMMPS *lmp) : PairCGCMMCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -106,7 +106,7 @@ void PairCGCMMCoulLongGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cmml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				    atom->type, domain->sublo, domain->subhi,
@@ -186,7 +186,7 @@ void PairCGCMMCoulLongGPU::init_style()
 			      force->qqrd2e, g_ewald);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_cg_cmm_coul_long_gpu.h b/src/GPU/pair_cg_cmm_coul_long_gpu.h
index 19dd11c66d..6ce9d635ab 100644
--- a/src/GPU/pair_cg_cmm_coul_long_gpu.h
+++ b/src/GPU/pair_cg_cmm_coul_long_gpu.h
@@ -33,7 +33,7 @@ class PairCGCMMCoulLongGPU : public PairCGCMMCoulLong {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp
index d946b739d5..467691c84d 100644
--- a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp
+++ b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp
@@ -72,7 +72,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCGCMMCoulMSMGPU::PairCGCMMCoulMSMGPU(LAMMPS *lmp) : PairCGCMMCoulMSM(lmp),
-                                                        gpu_mode(GPU_PAIR)
+                                                        gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -99,7 +99,7 @@ void PairCGCMMCoulMSMGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cmmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				    atom->type, domain->sublo, domain->subhi,
@@ -166,7 +166,7 @@ void PairCGCMMCoulMSMGPU::init_style()
 			      force->qqrd2e,_smooth);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_cg_cmm_coul_msm_gpu.h b/src/GPU/pair_cg_cmm_coul_msm_gpu.h
index cfb36e0cff..a7746f488c 100644
--- a/src/GPU/pair_cg_cmm_coul_msm_gpu.h
+++ b/src/GPU/pair_cg_cmm_coul_msm_gpu.h
@@ -33,7 +33,7 @@ class PairCGCMMCoulMSMGPU : public PairCGCMMCoulMSM {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_cg_cmm_gpu.cpp b/src/GPU/pair_cg_cmm_gpu.cpp
index e756d7d0e5..58d72708bb 100644
--- a/src/GPU/pair_cg_cmm_gpu.cpp
+++ b/src/GPU/pair_cg_cmm_gpu.cpp
@@ -66,7 +66,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairCGCMMGPU::PairCGCMMGPU(LAMMPS *lmp) : PairCGCMM(lmp), gpu_mode(GPU_PAIR)
+PairCGCMMGPU::PairCGCMMGPU(LAMMPS *lmp) : PairCGCMM(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -93,7 +93,7 @@ void PairCGCMMGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				   atom->type, domain->sublo, domain->subhi,
@@ -157,7 +157,7 @@ void PairCGCMMGPU::init_style()
 			     cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_cg_cmm_gpu.h b/src/GPU/pair_cg_cmm_gpu.h
index d4cead54b6..481e01af21 100644
--- a/src/GPU/pair_cg_cmm_gpu.h
+++ b/src/GPU/pair_cg_cmm_gpu.h
@@ -33,7 +33,7 @@ class PairCGCMMGPU : public PairCGCMM {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_coul_long_gpu.cpp b/src/GPU/pair_coul_long_gpu.cpp
index 468821dcec..82c0ccba0e 100644
--- a/src/GPU/pair_coul_long_gpu.cpp
+++ b/src/GPU/pair_coul_long_gpu.cpp
@@ -76,7 +76,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCoulLongGPU::PairCoulLongGPU(LAMMPS *lmp) : 
-  PairCoulLong(lmp), gpu_mode(GPU_PAIR)
+  PairCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -103,7 +103,7 @@ void PairCoulLongGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				  atom->type, domain->sublo, domain->subhi,
@@ -169,7 +169,7 @@ void PairCoulLongGPU::init_style()
 
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_coul_long_gpu.h b/src/GPU/pair_coul_long_gpu.h
index a84b281447..d381c83a04 100644
--- a/src/GPU/pair_coul_long_gpu.h
+++ b/src/GPU/pair_coul_long_gpu.h
@@ -33,7 +33,7 @@ class PairCoulLongGPU : public PairCoulLong {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_gayberne_gpu.cpp b/src/GPU/pair_gayberne_gpu.cpp
index c137f0f67c..b3224f7e02 100644
--- a/src/GPU/pair_gayberne_gpu.cpp
+++ b/src/GPU/pair_gayberne_gpu.cpp
@@ -73,7 +73,7 @@ enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
 /* ---------------------------------------------------------------------- */
 
 PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp),
-                                                gpu_mode(GPU_PAIR)
+                                                gpu_mode(GPU_FORCE)
 {
   avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   if (!avec) 
@@ -122,7 +122,7 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
     }
   }
 
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				  atom->type, domain->sublo, domain->subhi,
@@ -204,7 +204,7 @@ void PairGayBerneGPU::init_style()
 			    maxspecial, cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_gayberne_gpu.h b/src/GPU/pair_gayberne_gpu.h
index 9d59f0d7ff..656ca5eed0 100644
--- a/src/GPU/pair_gayberne_gpu.h
+++ b/src/GPU/pair_gayberne_gpu.h
@@ -33,7 +33,7 @@ class PairGayBerneGPU : public PairGayBerne {
   void init_style();
   double memory_usage();
 
-  enum { GPU_PAIR, GPU_NEIGH };
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj96_cut_gpu.cpp b/src/GPU/pair_lj96_cut_gpu.cpp
index ce51b12a7b..4e4417692e 100644
--- a/src/GPU/pair_lj96_cut_gpu.cpp
+++ b/src/GPU/pair_lj96_cut_gpu.cpp
@@ -65,7 +65,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJ96CutGPU::PairLJ96CutGPU(LAMMPS *lmp) : PairLJ96Cut(lmp), gpu_mode(GPU_PAIR)
+PairLJ96CutGPU::PairLJ96CutGPU(LAMMPS *lmp) : PairLJ96Cut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -92,7 +92,7 @@ void PairLJ96CutGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;  
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				    atom->type, domain->sublo, domain->subhi,
@@ -156,7 +156,7 @@ void PairLJ96CutGPU::init_style()
 			      cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj96_cut_gpu.h b/src/GPU/pair_lj96_cut_gpu.h
index b4baa68828..e7916fbe46 100644
--- a/src/GPU/pair_lj96_cut_gpu.h
+++ b/src/GPU/pair_lj96_cut_gpu.h
@@ -33,7 +33,7 @@ class PairLJ96CutGPU : public PairLJ96Cut {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
index fa48db0d9a..9be332ee5f 100644
--- a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
@@ -82,7 +82,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongGPU::PairLJCharmmCoulLongGPU(LAMMPS *lmp) : 
-  PairLJCharmmCoulLong(lmp), gpu_mode(GPU_PAIR)
+  PairLJCharmmCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -109,7 +109,7 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				    atom->type, domain->sublo, domain->subhi,
@@ -202,7 +202,7 @@ void PairLJCharmmCoulLongGPU::init_style()
 			      arithmetic);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_charmm_coul_long_gpu.h b/src/GPU/pair_lj_charmm_coul_long_gpu.h
index 0205b93860..f51f9d4fce 100644
--- a/src/GPU/pair_lj_charmm_coul_long_gpu.h
+++ b/src/GPU/pair_lj_charmm_coul_long_gpu.h
@@ -33,7 +33,7 @@ class PairLJCharmmCoulLongGPU : public PairLJCharmmCoulLong {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_class2_coul_long_gpu.cpp b/src/GPU/pair_lj_class2_coul_long_gpu.cpp
index 2c7ea41531..a33723599b 100644
--- a/src/GPU/pair_lj_class2_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_class2_coul_long_gpu.cpp
@@ -80,7 +80,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulLongGPU::PairLJClass2CoulLongGPU(LAMMPS *lmp) : 
-  PairLJClass2CoulLong(lmp), gpu_mode(GPU_PAIR)
+  PairLJClass2CoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   cpu_time = 0.0;
 }
@@ -106,7 +106,7 @@ void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = c2cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				    atom->type, domain->sublo, domain->subhi,
@@ -181,7 +181,7 @@ void PairLJClass2CoulLongGPU::init_style()
                               force->special_coul, force->qqrd2e, g_ewald);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_class2_coul_long_gpu.h b/src/GPU/pair_lj_class2_coul_long_gpu.h
index 9dbd0ff146..ea0ab27c57 100644
--- a/src/GPU/pair_lj_class2_coul_long_gpu.h
+++ b/src/GPU/pair_lj_class2_coul_long_gpu.h
@@ -33,7 +33,7 @@ class PairLJClass2CoulLongGPU : public PairLJClass2CoulLong {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_class2_gpu.cpp b/src/GPU/pair_lj_class2_gpu.cpp
index c45fb5bfdb..0115ed956d 100644
--- a/src/GPU/pair_lj_class2_gpu.cpp
+++ b/src/GPU/pair_lj_class2_gpu.cpp
@@ -65,7 +65,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJClass2GPU::PairLJClass2GPU(LAMMPS *lmp) : PairLJClass2(lmp), gpu_mode(GPU_PAIR)
+PairLJClass2GPU::PairLJClass2GPU(LAMMPS *lmp) : PairLJClass2(lmp), gpu_mode(GPU_FORCE)
 {
   cpu_time = 0.0;
 }
@@ -91,7 +91,7 @@ void PairLJClass2GPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;  
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				    atom->type, domain->sublo, domain->subhi,
@@ -153,7 +153,7 @@ void PairLJClass2GPU::init_style()
 			      cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_class2_gpu.h b/src/GPU/pair_lj_class2_gpu.h
index 7a2835e830..ea03bd74af 100644
--- a/src/GPU/pair_lj_class2_gpu.h
+++ b/src/GPU/pair_lj_class2_gpu.h
@@ -33,7 +33,7 @@ class PairLJClass2GPU : public PairLJClass2 {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
index cd19e81ca2..77f49055a4 100644
--- a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
@@ -70,7 +70,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutCoulCutGPU::PairLJCutCoulCutGPU(LAMMPS *lmp) : PairLJCutCoulCut(lmp), gpu_mode(GPU_PAIR)
+PairLJCutCoulCutGPU::PairLJCutCoulCutGPU(LAMMPS *lmp) : PairLJCutCoulCut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -97,7 +97,7 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;  
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				   atom->type, domain->sublo, domain->subhi,
@@ -165,7 +165,7 @@ void PairLJCutCoulCutGPU::init_style()
 			     force->special_coul, force->qqrd2e);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_cut_coul_cut_gpu.h b/src/GPU/pair_lj_cut_coul_cut_gpu.h
index 17bd4aa852..12108e8e9b 100644
--- a/src/GPU/pair_lj_cut_coul_cut_gpu.h
+++ b/src/GPU/pair_lj_cut_coul_cut_gpu.h
@@ -33,7 +33,7 @@ class PairLJCutCoulCutGPU : public PairLJCutCoulCut {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_gpu.cpp
index 2b3a915f0e..f24eb5a372 100644
--- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp
@@ -80,7 +80,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongGPU::PairLJCutCoulLongGPU(LAMMPS *lmp) : 
-  PairLJCutCoulLong(lmp), gpu_mode(GPU_PAIR)
+  PairLJCutCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -107,7 +107,7 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				    atom->type, domain->sublo, domain->subhi,
@@ -188,7 +188,7 @@ void PairLJCutCoulLongGPU::init_style()
                               force->special_coul, force->qqrd2e, g_ewald);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.h b/src/GPU/pair_lj_cut_coul_long_gpu.h
index 70fbf82a71..f30d8a2154 100644
--- a/src/GPU/pair_lj_cut_coul_long_gpu.h
+++ b/src/GPU/pair_lj_cut_coul_long_gpu.h
@@ -33,7 +33,7 @@ class PairLJCutCoulLongGPU : public PairLJCutCoulLong {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_cut_gpu.cpp b/src/GPU/pair_lj_cut_gpu.cpp
index 63a908d3e7..79a1bbdc3f 100644
--- a/src/GPU/pair_lj_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_gpu.cpp
@@ -65,7 +65,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_PAIR)
+PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -92,7 +92,7 @@ void PairLJCutGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall,
 				   atom->x, atom->type, domain->sublo,
@@ -156,7 +156,7 @@ void PairLJCutGPU::init_style()
 			     cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_cut_gpu.h b/src/GPU/pair_lj_cut_gpu.h
index 1a8c1db46f..20b1bf3aee 100644
--- a/src/GPU/pair_lj_cut_gpu.h
+++ b/src/GPU/pair_lj_cut_gpu.h
@@ -33,7 +33,7 @@ class PairLJCutGPU : public PairLJCut {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_cut_tgpu.cpp b/src/GPU/pair_lj_cut_tgpu.cpp
index 043cf0e5bb..718b03d3f6 100644
--- a/src/GPU/pair_lj_cut_tgpu.cpp
+++ b/src/GPU/pair_lj_cut_tgpu.cpp
@@ -64,7 +64,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutTGPU::PairLJCutTGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_PAIR)
+PairLJCutTGPU::PairLJCutTGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -97,7 +97,7 @@ void PairLJCutTGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall,
 				   atom->x, atom->type, domain->sublo,
@@ -168,7 +168,7 @@ void PairLJCutTGPU::init_style()
                               cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_cut_tgpu.h b/src/GPU/pair_lj_cut_tgpu.h
index 3393bd06b5..b9640929ce 100644
--- a/src/GPU/pair_lj_cut_tgpu.h
+++ b/src/GPU/pair_lj_cut_tgpu.h
@@ -34,7 +34,7 @@ class PairLJCutTGPU : public PairLJCut {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_lj_expand_gpu.cpp b/src/GPU/pair_lj_expand_gpu.cpp
index f45429a9db..dc8bff6c3d 100644
--- a/src/GPU/pair_lj_expand_gpu.cpp
+++ b/src/GPU/pair_lj_expand_gpu.cpp
@@ -66,7 +66,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJExpandGPU::PairLJExpandGPU(LAMMPS *lmp) : PairLJExpand(lmp), gpu_mode(GPU_PAIR)
+PairLJExpandGPU::PairLJExpandGPU(LAMMPS *lmp) : PairLJExpand(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
@@ -93,7 +93,7 @@ void PairLJExpandGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				   atom->type, domain->sublo, domain->subhi,
@@ -155,7 +155,7 @@ void PairLJExpandGPU::init_style()
 			     cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_lj_expand_gpu.h b/src/GPU/pair_lj_expand_gpu.h
index 96619034c7..f02d10a4c4 100644
--- a/src/GPU/pair_lj_expand_gpu.h
+++ b/src/GPU/pair_lj_expand_gpu.h
@@ -33,7 +33,7 @@ class PairLJExpandGPU : public PairLJExpand {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_morse_gpu.cpp b/src/GPU/pair_morse_gpu.cpp
index ae5fcdfe64..9bee6b7316 100644
--- a/src/GPU/pair_morse_gpu.cpp
+++ b/src/GPU/pair_morse_gpu.cpp
@@ -65,7 +65,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairMorseGPU::PairMorseGPU(LAMMPS *lmp) : PairMorse(lmp), gpu_mode(GPU_PAIR)
+PairMorseGPU::PairMorseGPU(LAMMPS *lmp) : PairMorse(lmp), gpu_mode(GPU_FORCE)
 {
   cpu_time = 0.0;
 }
@@ -91,7 +91,7 @@ void PairMorseGPU::compute(int eflag, int vflag)
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = mor_gpu_compute_n(neighbor->ago, inum, nall,
 				   atom->x, atom->type, domain->sublo,
@@ -153,7 +153,7 @@ void PairMorseGPU::init_style()
 			     cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_morse_gpu.h b/src/GPU/pair_morse_gpu.h
index ee9ba576d5..7023828192 100644
--- a/src/GPU/pair_morse_gpu.h
+++ b/src/GPU/pair_morse_gpu.h
@@ -33,7 +33,7 @@ class PairMorseGPU : public PairMorse {
   void init_style();
   double memory_usage();
 
- enum { GPU_PAIR, GPU_NEIGH };
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
diff --git a/src/GPU/pair_resquared_gpu.cpp b/src/GPU/pair_resquared_gpu.cpp
index 69b6060394..6d3d21bc9d 100644
--- a/src/GPU/pair_resquared_gpu.cpp
+++ b/src/GPU/pair_resquared_gpu.cpp
@@ -72,7 +72,7 @@ enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
 /* ---------------------------------------------------------------------- */
 
 PairRESquaredGPU::PairRESquaredGPU(LAMMPS *lmp) : PairRESquared(lmp),
-                                                gpu_mode(GPU_PAIR)
+                                                gpu_mode(GPU_FORCE)
 {
   avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   if (!avec) 
@@ -121,7 +121,7 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
     }
   }
 
-  if (gpu_mode == GPU_NEIGH) {
+  if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = re_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
 				  atom->type, domain->sublo, domain->subhi,
@@ -202,7 +202,7 @@ void PairRESquaredGPU::init_style()
 			    cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
-  if (gpu_mode != GPU_NEIGH) {
+  if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
diff --git a/src/GPU/pair_resquared_gpu.h b/src/GPU/pair_resquared_gpu.h
index f16812f784..54d5513a02 100644
--- a/src/GPU/pair_resquared_gpu.h
+++ b/src/GPU/pair_resquared_gpu.h
@@ -33,7 +33,7 @@ class PairRESquaredGPU : public PairRESquared {
   void init_style();
   double memory_usage();
 
-  enum { GPU_PAIR, GPU_NEIGH };
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;