diff --git a/src/USER-CUDA/atom_vec_atomic_cuda.cpp b/src/USER-CUDA/atom_vec_atomic_cuda.cpp
index b6bb14422c..f6b2d76b85 100644
--- a/src/USER-CUDA/atom_vec_atomic_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_atomic_cuda.cpp
@@ -286,14 +286,14 @@ int AtomVecAtomicCuda::pack_exchange(int dim, double *buf)
   }
   
   if(max_nsend==0) grow_copylist(200);
-  
+
   int nsend_atoms = Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
   
   if(nsend_atoms>max_nsend) {grow_copylist(nsend_atoms+100);}
   if(nsend_atoms*NCUDAEXCHANGE>*maxsend) 
   {
   	grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
-  	Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
+   	Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
   }
   
   int nlocal=atom->nlocal-nsend_atoms;
@@ -395,6 +395,7 @@ int AtomVecAtomicCuda::unpack_exchange(double *buf)
     }
   }
   cuda->shared_data.atom.nlocal=nlocal;
+  if(atom->nlocal!=nlocal)
   cuda->shared_data.atom.update_nlocal=2;
   atom->nlocal=nlocal;
   mfirst+=m;
diff --git a/src/USER-CUDA/cuda.cpp b/src/USER-CUDA/cuda.cpp
index 819357bc16..438312ee81 100644
--- a/src/USER-CUDA/cuda.cpp
+++ b/src/USER-CUDA/cuda.cpp
@@ -130,11 +130,11 @@ Cuda::Cuda(LAMMPS *lmp) : Pointers(lmp)
 	downloadtime=0;
 	dotiming=false;
 
-    dotestatom = false;
-    testatom = 0;	
+  dotestatom = false;
+  testatom = 0;
 	oncpu = true;
 
-    self_comm = 0;
+  self_comm = 0;
 	MYDBG( printf("# CUDA: Cuda::Cuda Done...\n");)
 	//cCudaData<double, float, yx >  
 }
@@ -267,10 +267,10 @@ void Cuda::accelerator(int narg, char** arg)
 	cu_virial     = 0;
 	cu_eatom      = 0;
 	cu_vatom      = 0;
-	cu_radius	  = 0;
+	cu_radius	    = 0;
 	cu_density	  = 0;
-	cu_omega	  = 0;
-	cu_torque	  = 0;
+	cu_omega	    = 0;
+	cu_torque	    = 0;
 	
 	cu_special 	  = 0;
 	cu_nspecial   = 0;
@@ -299,8 +299,11 @@ void Cuda::setSharedDataZero()
 	shared_data.atom.q_flag = 0;
 	shared_data.atom.need_eatom = 0;
 	shared_data.atom.need_vatom = 0;
+  shared_data.atom.update_nmax = 1;
+  shared_data.atom.update_nlocal = 1;
+  shared_data.atom.update_neigh = 1;
 	
-    shared_data.pair.cudable_force = 0;
+  shared_data.pair.cudable_force = 0;
 	shared_data.pair.collect_forces_later = 0;
 	shared_data.pair.use_block_per_atom = 0;
 	shared_data.pair.override_block_per_atom = -1;
@@ -429,14 +432,6 @@ void Cuda::checkResize()
 		if(cu_atom->q_flag)
 			{delete cu_q;          cu_q         = new cCudaData<double, F_FLOAT, x > ((double*)atom->q, & cu_atom->q         , atom->nmax  );}// cu_q->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
 
-/*
-		if(force->pair)
-		if(force->pair->eatom)
-			{delete cu_eatom;          cu_eatom         = new cCudaData<double, ENERGY_FLOAT, x > (force->pair->eatom, & cu_atom->eatom         , atom->nmax  );}// cu_eatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
-		if(force->pair)
-		if(force->pair->vatom)
-			{delete cu_vatom;          cu_vatom         = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)force->pair->vatom, & cu_atom->vatom         , atom->nmax,6  );}// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
-*/
 		if(atom->radius)
 		{
 			delete cu_radius;     cu_radius    = new cCudaData<double, X_FLOAT, x > (atom->radius    , & cu_atom->radius     , atom->nmax  );
@@ -444,11 +439,6 @@ void Cuda::checkResize()
 		    delete cu_omega_rmass;   cu_omega_rmass  = new cCudaData<V_FLOAT, V_FLOAT, x> (omega_rmass , & cu_atom->omega_rmass      , atom->nmax*4);
 		}
 
-		/*		
-		if(atom->density)
-			{delete cu_density;    cu_density   = new cCudaData<double, F_FLOAT, x > (atom->density   , & cu_atom->density     , atom->nmax  );}
-		*/
-
 		if(atom->omega)
 			{delete cu_omega;      cu_omega     = new cCudaData<double, V_FLOAT, yx > (((double*) atom->omega)    , & cu_atom->omega     , atom->nmax,3  );}
 
@@ -464,12 +454,10 @@ void Cuda::checkResize()
 		shared_data.atom.special_flag = neighbor->special_flag;
 		shared_data.atom.molecular = atom->molecular;
 		   
-  	    cu_atom->update_nmax = 2;
-	    cu_atom->nmax        = atom->nmax;
+    cu_atom->update_nmax = 2;
+    cu_atom->nmax        = atom->nmax;
 	    
-	    //delete [] x_type; 			x_type 		= new X_FLOAT4[atom->nmax];
 		delete cu_x_type;           cu_x_type   = new cCudaData<X_FLOAT, X_FLOAT, x> (x_type , & cu_atom->x_type      , atom->nmax*4);
-	   // shared_data.buffer_new = 2;
 	}
 
 	if(((cu_xhold==NULL)||(cu_xhold->get_dim()[0]<neighbor->maxhold))&&neighbor->xhold)
@@ -488,6 +476,12 @@ void Cuda::checkResize()
 	  {
 	  	cu_map_array   = new cCudaData<int, int, x > (atom->get_map_array()   , & cu_atom->map_array     , atom->get_map_size()  );
 	  }
+	  else
+	  if(cu_map_array->dev_size()/sizeof(int)<atom->get_map_size())
+	  {
+	    delete cu_map_array;
+      cu_map_array   = new cCudaData<int, int, x > (atom->get_map_array()   , & cu_atom->map_array     , atom->get_map_size()  );
+	  }
 	}
 	
 	
@@ -512,11 +506,6 @@ void Cuda::checkResize()
 	if(atom->radius)
 	if(cu_radius->get_host_data() != atom->radius) cu_radius->set_host_data((double*) (atom->radius));
 
-	/*
-	if(atom->density)
-	if(cu_density->get_host_data() != atom->density) cu_density->set_host_data((double*) (atom->density));
-	*/
-
 	if(atom->omega)
 	if(cu_omega->get_host_data() != atom->omega) cu_omega->set_host_data((double*) (atom->omega));
 
@@ -558,7 +547,7 @@ void Cuda::evsetup_eatom_vatom(int eflag_atom,int vflag_atom)
     	if(not cu_vatom) 
     		cu_vatom         = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)force->pair->vatom, & (shared_data.atom.vatom)         , atom->nmax ,6 );// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
     	cu_vatom->set_host_data((double*)force->pair->vatom); 
-		cu_vatom->memset_device(0);
+		  cu_vatom->memset_device(0);
     }
 }
 
@@ -579,16 +568,9 @@ void Cuda::uploadAll()
 	cu_image->upload();
 	if(shared_data.atom.q_flag) cu_q    ->upload();
 	
-	//printf("A3\n");
-	//if(shared_data.atom.need_eatom) cu_eatom->upload();
-	//printf("A4\n");
-	//if(shared_data.atom.need_vatom) cu_vatom->upload();
-	//printf("A5\n");
-	
 	if(atom->rmass)             cu_rmass->upload();
 
 	if(atom->radius)            cu_radius->upload();
-	//	if(atom->density)           cu_density->upload();
 	if(atom->omega)             cu_omega->upload();
 	if(atom->torque)            cu_torque->upload();
 	if(atom->special)           cu_special->upload();
@@ -631,7 +613,6 @@ void Cuda::downloadAll()
 	if(atom->rmass)             cu_rmass->download();
 	
 	if(atom->radius)            cu_radius->download();
-	//	if(atom->density)           cu_density->download();
 	if(atom->omega)             cu_omega->download();
 	if(atom->torque)            cu_torque->download();
 	if(atom->special)           cu_special->download();
@@ -747,13 +728,13 @@ void Cuda::setTimingsZero()
 	shared_data.cuda_timings.neigh_special = 0;
 	
 	//PPPM
- 	shared_data.cuda_timings.pppm_particle_map; 
-    shared_data.cuda_timings.pppm_make_rho; 
-    shared_data.cuda_timings.pppm_brick2fft; 
-    shared_data.cuda_timings.pppm_poisson; 
-    shared_data.cuda_timings.pppm_fillbrick; 
-    shared_data.cuda_timings.pppm_fieldforce; 
-    shared_data.cuda_timings.pppm_compute; 
+ 	shared_data.cuda_timings.pppm_particle_map = 0;
+  shared_data.cuda_timings.pppm_make_rho = 0;
+  shared_data.cuda_timings.pppm_brick2fft = 0;
+  shared_data.cuda_timings.pppm_poisson = 0;
+  shared_data.cuda_timings.pppm_fillbrick = 0;
+  shared_data.cuda_timings.pppm_fieldforce = 0;
+  shared_data.cuda_timings.pppm_compute = 0;
 	
 	CudaWrapper_CheckUploadTime(true);
 	CudaWrapper_CheckDownloadTime(true);
@@ -789,8 +770,8 @@ void Cuda::print_timings()
 	printf(" Exchange MPI            \t %lf \n",shared_data.cuda_timings.comm_exchange_mpi);
 	printf(" Exchange Kernel Pack    \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_pack);
 	printf(" Exchange Kernel Unpack  \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_unpack);
-    printf(" Exchange Kernel Fill    \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_fill);
-    printf(" Exchange CPU Pack	     \t %lf \n",shared_data.cuda_timings.comm_exchange_cpu_pack);
+  printf(" Exchange Kernel Fill    \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_fill);
+  printf(" Exchange CPU Pack	     \t %lf \n",shared_data.cuda_timings.comm_exchange_cpu_pack);
 	printf(" Exchange Upload         \t %lf \n",shared_data.cuda_timings.comm_exchange_upload);
 	printf(" Exchange Download       \t %lf \n",shared_data.cuda_timings.comm_exchange_download);
 	printf("\n");
diff --git a/src/USER-CUDA/cuda_neigh_list.cpp b/src/USER-CUDA/cuda_neigh_list.cpp
index ef9edf5ef3..01f8e0c6a8 100644
--- a/src/USER-CUDA/cuda_neigh_list.cpp
+++ b/src/USER-CUDA/cuda_neigh_list.cpp
@@ -111,6 +111,7 @@ void CudaNeighList::dev_alloc()
 	neighbors_inner = new int[sneighlist.maxlocal*sneighlist.maxneighbors];
 	cu_neighbors_inner = new cCudaData<int, int, x> (neighbors_inner	 , & sneighlist.neighbors_inner , sneighlist.maxlocal*sneighlist.maxneighbors );
 	}
+	cuda->shared_data.atom.update_neigh=2;
 	MYDBG( printf("# CUDA: CudaNeighList::dev_alloc() ... end\n"); )
 }
 
diff --git a/src/USER-CUDA/neigh_full_cuda.cpp b/src/USER-CUDA/neigh_full_cuda.cpp
index 61c9897f4a..14fe153ec9 100644
--- a/src/USER-CUDA/neigh_full_cuda.cpp
+++ b/src/USER-CUDA/neigh_full_cuda.cpp
@@ -250,6 +250,7 @@ void NeighborCuda::full_bin_cuda(NeighList *list)
   }*/
   list->cuda_list->cu_numneigh->download();
   list->cuda_list->cu_ilist->download();
+  cuda->shared_data.atom.update_neigh=2;
 	//printf("Done\n");
   
   MYDBG(printf(" # CUDA::NeighFullBinCuda ... end\n");)
diff --git a/src/USER-CUDA/verlet_cuda.cpp b/src/USER-CUDA/verlet_cuda.cpp
index fbaa1800a5..0a3ba3ff40 100644
--- a/src/USER-CUDA/verlet_cuda.cpp
+++ b/src/USER-CUDA/verlet_cuda.cpp
@@ -564,6 +564,7 @@ void VerletCuda::run(int n)
   cuda->shared_data.atom.reneigh_flag=0;
   cuda->shared_data.atom.update_nlocal=1;
   cuda->shared_data.atom.update_nmax=1;
+  cuda->shared_data.atom.update_neigh=1;
   cuda->shared_data.domain.update=1;
   cuda->shared_data.buffer_new=1;
   cuda->uploadtime=0;
@@ -627,14 +628,12 @@ void VerletCuda::run(int n)
 		          
 		          //start force calculation asynchronus
 			      cuda->shared_data.comm.comm_phase=1;
-			    //  printf("Pre Force Compute\n");
 		          force->pair->compute(eflag, vflag);
 			      timer->stamp(TIME_PAIR);
                   //CudaWrapper_Sync();
 				
 				  //download comm buffers from GPU, perform MPI communication and upload buffers again
 				  clock_gettime(CLOCK_REALTIME,&starttime);
-			   //   printf("Pre forward_comm(2)\n");
 				  comm->forward_comm(2);
  				  clock_gettime(CLOCK_REALTIME,&endtime);
 				  cuda->shared_data.cuda_timings.comm_forward_total+=
@@ -642,16 +641,13 @@ void VerletCuda::run(int n)
  				  timer->stamp(TIME_COMM);
  				  
  				  //wait for force calculation
-			      //printf("Pre Synch\n");
 				  CudaWrapper_Sync();
 				  timer->stamp(TIME_PAIR);			
 				
 				  //unpack communication buffers
 				  clock_gettime(CLOCK_REALTIME,&starttime);
-			    //  printf("Pre forward_comm(3)\n");
 				  comm->forward_comm(3);
 				  clock_gettime(CLOCK_REALTIME,&endtime);
-			  //    printf("Post forward_comm(3)\n");
 				  cuda->shared_data.cuda_timings.comm_forward_total+=
  						endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
 				
@@ -663,11 +659,9 @@ void VerletCuda::run(int n)
 			    else
 			    {
 			  	  //perform standard forward communication
-				//printf("Forward_comm\n");
 				  clock_gettime(CLOCK_REALTIME,&starttime);
 				  comm->forward_comm();
 				  clock_gettime(CLOCK_REALTIME,&endtime);
-				//printf("Forward_comm_done\n");
 				  cuda->shared_data.cuda_timings.comm_forward_total+=
  					endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
  				  timer->stamp(TIME_COMM);
@@ -677,13 +671,13 @@ void VerletCuda::run(int n)
 			else
 			{
  				int nlocalold=cuda->shared_data.atom.nlocal;
- 				//if(firstreneigh)
+ 				if(firstreneigh)
  				{
  				  cuda->shared_data.atom.update_nlocal=1; 
-  				  cuda->shared_data.atom.update_nmax=1;
+  				cuda->shared_data.atom.update_nmax=1;
  				  firstreneigh=0;
  				}
- 				  cuda->shared_data.buffer_new=1;
+ 				cuda->shared_data.buffer_new=1;
 				MYDBG( printf("# CUDA VerletCuda::iterate: neighbor\n"); )
  				cuda->setDomainParams();
 				if(n_pre_exchange) modify->pre_exchange();
@@ -759,10 +753,10 @@ void VerletCuda::run(int n)
 				cuda->shared_data.cuda_timings.test2+=
  					endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
 			    
-			    //rebuild neighbor list
-			    test_atom(testatom,"Pre Neighbor");
+			  //rebuild neighbor list
+			  test_atom(testatom,"Pre Neighbor");
 				neighbor->build();
-				timer->stamp(TIME_NEIGHBOR);
+  			timer->stamp(TIME_NEIGHBOR);
 				MYDBG( printf("# CUDA VerletCuda::iterate: neighbor done\n"); )
 				
 				//if bonded interactions are used (in this case collect_forces_later is true), transfer data which only changes upon exchange/border routines from GPU to CPU 
@@ -772,7 +766,7 @@ void VerletCuda::run(int n)
 					cuda->cu_tag->download();
 					cuda->cu_type->download();
 					cuda->cu_mask->download();
-		      		if(cuda->cu_q) cuda->cu_q->download();
+		   		if(cuda->cu_q) cuda->cu_q->download();
 				}
 				cuda->shared_data.comm.comm_phase=3;
 			}
@@ -969,14 +963,16 @@ void VerletCuda::run(int n)
 			test_atom(testatom,"post output");
 			
 			if(cuda->shared_data.atom.update_nlocal>0)
-			cuda->shared_data.atom.update_nlocal--;
-  			if(cuda->shared_data.atom.update_nmax>0)
-  			cuda->shared_data.atom.update_nmax--;
-  			if(cuda->shared_data.domain.update>0)
+			  cuda->shared_data.atom.update_nlocal--;
+  		if(cuda->shared_data.atom.update_nmax>0)
+  		  cuda->shared_data.atom.update_nmax--;
+      if(cuda->shared_data.atom.update_neigh>0)
+        cuda->shared_data.atom.update_neigh--;
+  		if(cuda->shared_data.domain.update>0)
   			cuda->shared_data.domain.update--;
-  			if(cuda->shared_data.buffer_new>0)
+  		if(cuda->shared_data.buffer_new>0)
   			cuda->shared_data.buffer_new--;
-    		cuda->shared_data.atom.reneigh_flag=0;
+    	cuda->shared_data.atom.reneigh_flag=0;
 		}
 
 
@@ -984,6 +980,7 @@ void VerletCuda::run(int n)
  		cuda->downloadAllNeighborLists();
   		cuda->shared_data.atom.update_nlocal=1;
   		cuda->shared_data.atom.update_nmax=1;
+      cuda->shared_data.atom.update_neigh=1;
   		cuda->shared_data.buffer_new=1;
   		cuda->shared_data.domain.update=1;
   		cuda->oncpu = true;