diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp
index 8d9af4706e..917166c423 100644
--- a/lib/gpu/lal_amoeba.cpp
+++ b/lib/gpu/lal_amoeba.cpp
@@ -140,7 +140,7 @@ void AmoebaT::clear() {
   coeff_amclass.clear();
   sp_polar.clear();
   sp_nonpolar.clear();
-  
+
   this->clear_atomic();
 }
 
@@ -169,7 +169,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
 
   // Build the short neighbor list for the cutoff off2_mpole,
   //   at this point mpole is the first kernel in a time step
-  
+
   this->k_short_nbor.set_size(GX,BX);
   this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
                          &this->_nbor_data->begin(),
@@ -194,7 +194,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 int AmoebaT::udirect2b(const int eflag, const int vflag) {
-  int ainum=this->ans->inum(); 
+  int ainum=this->ans->inum();
   if (ainum == 0)
     return 0;
 
@@ -216,7 +216,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
                            &nbor_pitch, &this->_threads_per_atom);
     this->short_nbor_polar_avail = true;
   }
-  
+
   this->k_udirect2b.set_size(GX,BX);
   this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
                         &this->nbor->dev_nbor, &this->_nbor_data->begin(),
diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu
index 1deb3e3bb5..fdb959f3e2 100644
--- a/lib/gpu/lal_amoeba.cu
+++ b/lib/gpu/lal_amoeba.cu
@@ -492,7 +492,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -500,7 +500,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-  
+
       numtyp r = ucl_sqrt(r2);
       const numtyp4 pol1j = polar1[j];
       numtyp ck  = pol1j.x;  // rpole[j][0];
@@ -533,12 +533,12 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
       numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
       numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
       numtyp qkr = qkx*xr + qky*yr + qkz*zr;
-      
+
       numtyp dik = dix*dkx + diy*dky + diz*dkz;
       numtyp qik = qix*qkx + qiy*qky + qiz*qkz;
       numtyp diqk = dix*qkx + diy*qky + diz*qkz;
       numtyp dkqi = dkx*qix + dky*qiy + dkz*qiz;
-      numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) + 
+      numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
         qixx*qkxx + qiyy*qkyy + qizz*qkzz;
 
       // additional intermediates involving moments and distance
@@ -585,11 +585,11 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
       numtyp dkqirx = dkqiz*yr - dkqiy*zr;
       numtyp dkqiry = dkqix*zr - dkqiz*xr;
       numtyp dkqirz = dkqiy*xr - dkqix*yr;
-      numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy - 
+      numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
         (numtyp)2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
-      numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz - 
+      numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
         (numtyp)2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
-      numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix - 
+      numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
         (numtyp)2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
 
       // get reciprocal distance terms for this interaction
@@ -650,20 +650,20 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
 
       // compute the force components for this interaction
 
-      numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) + 
+      numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
         term4*qix + term5*qkx + term6*(qixk+qkxi);
-      numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) + 
+      numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
         term4*qiy + term5*qky + term6*(qiyk+qkyi);
-      numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) + 
+      numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
         term4*qiz + term5*qkz + term6*(qizk+qkzi);
 
       // compute the torque components for this interaction
 
-      numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) - 
+      numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) -
         term4*qirx - term6*(qikrx+qikx);
-      numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) - 
+      numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) -
         term4*qiry - term6*(qikry+qiky);
-      numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) - 
+      numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) -
         term4*qirz - term6*(qikrz+qikz);
 
       // increment force-based gradient and torque on first site
@@ -691,12 +691,12 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
         virial[5] += vyz;
       }
     } // nbor
-    
+
   } // ii<inum
 
   // accumulate tq
   store_answers_amoeba_tq(tq,ii,inum,tid,t_per_atom,offset,i,tep);
-  
+
   // accumate force, energy and virial: use _acc if not the first kernel
   store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
      offset,eflag,vflag,ans,engv);
@@ -771,7 +771,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
     numtyp qizz = pol3i.y;   // rpole[i][12];
     int itype  = pol3i.z;    // amtype[i];
     int igroup = pol3i.w;    // amgroup[i];
-    
+
     // debug:
     // xi__ = ix; xi__.w = itype;
 
@@ -790,7 +790,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -798,7 +798,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-      
+
       numtyp r = ucl_sqrt(r2);
       numtyp rinv = ucl_recip(r);
       numtyp r2inv = rinv*rinv;
@@ -898,7 +898,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
       fid[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
       fid[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
       fid[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
-        
+
       scalek = factor_pscale;
       bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
       bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
@@ -918,7 +918,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
   } // ii<inum
 
   // accumulate field and fieldp
-  
+
   store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
 }
 
@@ -977,10 +977,10 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
     int itype,igroup;
     numtyp bn[4],bcn[3];
     numtyp fid[3],fip[3];
-    
+
     itype  = polar3[i].z; // amtype[i];
     igroup = polar3[i].w; // amgroup[i];
-    
+
     numtyp pdi = coeff[itype].x;
     numtyp pti = coeff[itype].y;
 
@@ -995,7 +995,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -1003,7 +1003,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-  
+
       numtyp r = ucl_sqrt(r2);
       numtyp rinv = ucl_recip(r);
       numtyp r2inv = rinv*rinv;
@@ -1044,7 +1044,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
       }
 
       // find terms needed later to compute mutual polarization
-      // if (poltyp != DIRECT) 
+      // if (poltyp != DIRECT)
       numtyp scale3 = (numtyp)1.0;
       numtyp scale5 = (numtyp)1.0;
       numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
@@ -1056,7 +1056,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
           scale3 = (numtyp)1.0 - expdamp;
           scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
         }
-        
+
       } else { // damp == 0: ???
       }
 
@@ -1071,17 +1071,17 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
       tdipdip[3] = -bcn[0] + bcn[1]*yr*yr;
       tdipdip[4] = bcn[1]*yr*zr;
       tdipdip[5] = -bcn[0] + bcn[1]*zr*zr;
-      //if (i==0 && j == 10) 
+      //if (i==0 && j == 10)
       //  printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
       //    i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
       fid[0] = tdipdip[0]*ukx + tdipdip[1]*uky + tdipdip[2]*ukz;
       fid[1] = tdipdip[1]*ukx + tdipdip[3]*uky + tdipdip[4]*ukz;
       fid[2] = tdipdip[2]*ukx + tdipdip[4]*uky + tdipdip[5]*ukz;
-      
+
       fip[0] = tdipdip[0]*ukxp + tdipdip[1]*ukyp + tdipdip[2]*ukzp;
       fip[1] = tdipdip[1]*ukxp + tdipdip[3]*ukyp + tdipdip[4]*ukzp;
       fip[2] = tdipdip[2]*ukxp + tdipdip[4]*ukyp + tdipdip[5]*ukzp;
-      
+
       _fieldp[0] += fid[0];
       _fieldp[1] += fid[1];
       _fieldp[2] += fid[2];
@@ -1093,7 +1093,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
   } // ii<inum
 
   // accumulate field and fieldp
-  
+
   store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
 }
 
@@ -1221,7 +1221,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -1229,9 +1229,9 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-  
+
       numtyp r = ucl_sqrt(r2);
-      
+
       const numtyp4 pol1j = polar1[j];
       numtyp ck = pol1j.x;   // rpole[j][0];
       numtyp dkx = pol1j.y;  // rpole[j][1];
@@ -1383,7 +1383,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
       numtyp tiy3 = psr3*uky + dsr3*ukyp;
       numtyp tiz3 = psr3*ukz + dsr3*ukzp;
       numtyp tuir = -psr5*ukr - dsr5*ukrp;
-      
+
       ufld[0] += tix3 + xr*tuir;
       ufld[1] += tiy3 + yr*tuir;
       ufld[2] += tiz3 + zr*tuir;
@@ -1394,14 +1394,14 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
       numtyp tiy5 = (numtyp)2.0 * (psr5*uky+dsr5*ukyp);
       numtyp tiz5 = (numtyp)2.0 * (psr5*ukz+dsr5*ukzp);
       tuir = -psr7*ukr - dsr7*ukrp;
-      
+
       dufld[0] += xr*tix5 + xr*xr*tuir;
       dufld[1] += xr*tiy5 + yr*tix5 + (numtyp)2.0*xr*yr*tuir;
       dufld[2] += yr*tiy5 + yr*yr*tuir;
       dufld[3] += xr*tiz5 + zr*tix5 + (numtyp)2.0*xr*zr*tuir;
       dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir;
       dufld[5] += zr*tiz5 + zr*zr*tuir;
-      
+
       // get the dEd/dR terms used for direct polarization force
 
       term1 = bn[2] - dsc3*rr5;
@@ -1473,7 +1473,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
       numtyp frcz = depz;
 
       // get the dEp/dR terms used for direct polarization force
-      
+
       // tixx and tkxx
       term1 = bn[2] - psc3*rr5;
       term2 = bn[3] - psc5*rr7;
@@ -1550,7 +1550,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
 
       // get the dtau/dr terms used for mutual polarization force
       // poltyp == MUTUAL  && amoeba
-          
+
       term1 = bn[2] - usc3*rr5;
       term2 = bn[3] - usc5*rr7;
       term3 = usr5 + term1;
@@ -1617,7 +1617,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
         virial[5] += vyz;
       }
     } // nbor
-    
+
   } // ii<inum
 
   // accumulate ufld and dufld to compute tep
@@ -1648,7 +1648,7 @@ __kernel void k_special15(__global int * dev_nbor,
   atom_info(t_per_atom,ii,tid,offset);
 
   if (ii<inum) {
-  
+
     int numj, nbor, nbor_end;
     nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
               n_stride,nbor_end,nbor);
diff --git a/lib/gpu/lal_amoeba.h b/lib/gpu/lal_amoeba.h
index 04eb6e4aa9..df72435b81 100644
--- a/lib/gpu/lal_amoeba.h
+++ b/lib/gpu/lal_amoeba.h
@@ -70,13 +70,13 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
   UCL_D_Vec<numtyp4> coeff_amtype;
   /// csix = coeff_amclass.x; adisp = coeff_amclass.y;
   UCL_D_Vec<numtyp4> coeff_amclass;
-  /// Special polar values [0-4]: 
+  /// Special polar values [0-4]:
   ///   sp_polar.x = special_polar_wscale
   ///   sp_polar.y special_polar_pscale,
   ///   sp_polar.z = special_polar_piscale
   ///   sp_polar.w = special_mpole
   UCL_D_Vec<numtyp4> sp_polar;
-  /// Special nonpolar values [0-4]: 
+  /// Special nonpolar values [0-4]:
   ///   sp_nonpolar.x = special_hal
   ///   sp_nonpolar.y special_repel
   ///   sp_nonpolar.z = special_disp
@@ -97,7 +97,7 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
   int udirect2b(const int eflag, const int vflag);
   int umutual2b(const int eflag, const int vflag);
   int polar_real(const int eflag, const int vflag);
-  
+
 };
 
 }
diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp
index c4fdb8c9e5..3728fbe85e 100644
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@@ -106,7 +106,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
                               _threads_per_atom);
   if (success!=0)
     return success;
-                              
+
   // Initialize host-device load balancer
   hd_balancer.init(device,gpu_nbor,gpu_split);
 
@@ -121,7 +121,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
   _maxspecial=maxspecial;
   _maxspecial15=maxspecial15;
 
-  // allocate per-atom array tep 
+  // allocate per-atom array tep
 
   int ef_nall=nlocal; //nall;
   if (ef_nall==0)
@@ -250,7 +250,7 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
                           const bool eflag_in, const bool vflag_in,
                           const bool eatom, const bool vatom,
                           int &host_start, const double cpu_time,
-                          bool &success, const double aewald, const double felec, 
+                          bool &success, const double aewald, const double felec,
                           const double off2_polar, double *host_q, const int nlocal,
                           double *boxlo, double *prd, void **tep_ptr) {
   acc_timers();
@@ -280,7 +280,7 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
     dev_special15_t.clear();
     dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
     dev_special15.alloc(_maxspecial15*nall,*(this->ucl_device),UCL_READ_ONLY);
-    dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);   
+    dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
   }
 
   *tep_ptr=_tep.host.begin();
@@ -320,7 +320,7 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
   _off2_polar = off2_polar;
   _felec = felec;
   const int red_blocks=polar_real(eflag,vflag);
-  
+
   ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks);
   device->add_ans_object(ans);
   hd_balancer.stop_timer();
@@ -375,7 +375,7 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
     dev_special15_t.clear();
     dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
     dev_special15.alloc(_maxspecial15*nall,*(this->ucl_device),UCL_READ_ONLY);
-    dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);   
+    dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
   }
 
   if (inum_full==0) {
@@ -462,7 +462,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
 
   // reallocate per-atom arrays, transfer data from the host
   //   and build the neighbor lists if needed
-  // NOTE: 
+  // NOTE:
   //   For now we invoke precompute() again here,
   //     to be able to turn on/off the udirect2b kernel (which comes before this)
   //   Once all the kernels are ready, precompute() is needed only once
@@ -509,7 +509,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
     numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
     printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
   }
-*/  
+*/
   return firstneigh; // nbor->host_jlist.begin()-host_start;
 }
 
@@ -560,7 +560,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full,
                           eflag_in, vflag_in, eatom, vatom,
                           host_start, ilist, jnum, cpu_time,
                           success, host_q, boxlo, prd);
-                         
+
   // ------------------- Resize _fieldp array ------------------------
 
   if (inum_full>_max_fieldp_size) {
@@ -698,7 +698,7 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
 
   // reallocate per-atom arrays, transfer data from the host
   //   and build the neighbor lists if needed
-  // NOTE: 
+  // NOTE:
   //   For now we invoke precompute() again here,
   //     to be able to turn on/off the udirect2b kernel (which comes before this)
   //   Once all the kernels are ready, precompute() is needed only once
@@ -745,7 +745,7 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
     numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
     printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
   }
-*/  
+*/
   return firstneigh; // nbor->host_jlist.begin()-host_start;
 }
 
@@ -809,7 +809,7 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
       int idx = n+i*nstride;
       pextra[idx]   = uinp[i][0];
       pextra[idx+1] = uinp[i][1];
-      pextra[idx+2] = uinp[i][2];    
+      pextra[idx+2] = uinp[i][2];
     }
   }
 
@@ -818,7 +818,7 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
 
     for (int i = 0; i < _nall; i++) {
       int idx = n+i*nstride;
-      pextra[idx]   = pval[i]; 
+      pextra[idx]   = pval[i];
     }
   }
 }
@@ -846,7 +846,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
   k_special15.set_function(*pair_program,"k_special15");
   pos_tex.get_texture(*pair_program,"pos_tex");
   q_tex.get_texture(*pair_program,"q_tex");
-  
+
   _compiled=true;
 
   #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
@@ -874,13 +874,13 @@ int BaseAmoebaT::add_onefive_neighbors() {
   int _nall=atom->nall();
   int ainum=ans->inum();
   int nbor_pitch=nbor->nbor_pitch();
-  
+
   k_special15.set_size(GX,BX);
   k_special15.run(&nbor->dev_nbor, &_nbor_data->begin(),
                     &atom->dev_tag, &dev_nspecial15, &dev_special15,
                     &ainum, &_nall, &nbor_pitch,
                     &_threads_per_atom);
-  
+
   return GX;
 }
 
diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h
index fc665ec731..bd30fc3fbb 100644
--- a/lib/gpu/lal_base_amoeba.h
+++ b/lib/gpu/lal_base_amoeba.h
@@ -287,7 +287,7 @@ class BaseAmoeba {
   virtual int udirect2b(const int eflag, const int vflag) = 0;
   virtual int umutual2b(const int eflag, const int vflag) = 0;
   virtual int polar_real(const int eflag, const int vflag) = 0;
-  
+
 };
 
 }
diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp
index caf910863f..ac221f8376 100644
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@@ -145,7 +145,7 @@ void HippoT::clear() {
   coeff_amclass.clear();
   sp_polar.clear();
   sp_nonpolar.clear();
-  
+
   this->clear_atomic();
 }
 
@@ -199,7 +199,7 @@ int** HippoT::precompute(const int ago, const int inum_full, const int nall,
     this->dev_special15_t.clear();
     this->dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
     this->dev_special15.alloc(this->_maxspecial15*nall,*(this->ucl_device),UCL_READ_ONLY);
-    this->dev_special15_t.alloc(nall*this->_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);   
+    this->dev_special15_t.alloc(nall*this->_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
   }
 
   if (inum_full==0) {
@@ -286,7 +286,7 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
 
   // reallocate per-atom arrays, transfer data from the host
   //   and build the neighbor lists if needed
-  // NOTE: 
+  // NOTE:
   //   For now we invoke precompute() again here,
   //     to be able to turn on/off the udirect2b kernel (which comes before this)
   //   Once all the kernels are ready, precompute() is needed only once
@@ -339,7 +339,7 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
 
   // Build the short neighbor list for the cutoff off2_disp,
   //   at this point mpole is the first kernel in a time step
-  
+
   this->k_short_nbor.set_size(GX,BX);
   this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
                          &this->_nbor_data->begin(),
@@ -397,7 +397,7 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
 
   // reallocate per-atom arrays, transfer data from the host
   //   and build the neighbor lists if needed
-  // NOTE: 
+  // NOTE:
   //   For now we invoke precompute() again here,
   //     to be able to turn on/off the udirect2b kernel (which comes before this)
   //   Once all the kernels are ready, precompute() is needed only once
@@ -468,7 +468,7 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
 
   // Build the short neighbor list for the cutoff off2_mpole,
   //   at this point mpole is the first kernel in a time step
-  
+
   this->k_short_nbor.set_size(GX,BX);
   this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
                          &this->_nbor_data->begin(),
@@ -537,7 +537,7 @@ int** HippoT::compute_udirect2b(const int ago, const int inum_full,
                           eflag_in, vflag_in, eatom, vatom,
                           host_start, ilist, jnum, cpu_time,
                           success, host_q, boxlo, prd);
-                         
+
   // ------------------- Resize _fieldp array ------------------------
 
   if (inum_full>this->_max_fieldp_size) {
@@ -569,7 +569,7 @@ int** HippoT::compute_udirect2b(const int ago, const int inum_full,
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 int HippoT::udirect2b(const int eflag, const int vflag) {
-  int ainum=this->ans->inum(); 
+  int ainum=this->ans->inum();
   if (ainum == 0)
     return 0;
 
@@ -591,7 +591,7 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
                            &nbor_pitch, &this->_threads_per_atom);
     this->short_nbor_polar_avail = true;
   }
-  
+
   this->k_udirect2b.set_size(GX,BX);
   this->k_udirect2b.run(&this->atom->x, &this->atom->extra,
                         &coeff_amtype, &coeff_amclass, &sp_polar,
@@ -756,7 +756,7 @@ int** HippoT::compute_polar_real(const int ago, const int inum_full,
 
   // reallocate per-atom arrays, transfer data from the host
   //   and build the neighbor lists if needed
-  // NOTE: 
+  // NOTE:
   //   For now we invoke precompute() again here,
   //     to be able to turn on/off the udirect2b kernel (which comes before this)
   //   Once all the kernels are ready, precompute() is needed only once
@@ -803,7 +803,7 @@ int** HippoT::compute_polar_real(const int ago, const int inum_full,
     numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
     printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
   }
-*/  
+*/
   return firstneigh; // nbor->host_jlist.begin()-host_start;
 }
 
diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu
index f643f2b994..b282586efb 100644
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@@ -491,7 +491,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = ix.x - jx.x;
       numtyp yr = ix.y - jx.y;
@@ -499,7 +499,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       if (r2>off2) continue;
-  
+
       const numtyp4 pol1j = polar1[j];
       numtyp ck  = pol1j.x;  // rpole[j][0];
       numtyp dkx = pol1j.y;  // rpole[j][1];
@@ -514,7 +514,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
       numtyp qkyz = pol3j.x; // rpole[j][9];
       numtyp qkzz = pol3j.y; // rpole[j][12];
       int jtype = pol3j.z; // amtype[j];
-      
+
       numtyp sizk = coeff[jtype].x; // sizpr[jtype];
       numtyp dmpk = coeff[jtype].y; // dmppr[jtype];
       numtyp valk = coeff[jtype].z; // elepr[jtype];
@@ -534,12 +534,12 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
       numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
       numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
       numtyp qkr = qkx*xr + qky*yr + qkz*zr;
-      
+
       numtyp dik = dix*dkx + diy*dky + diz*dkz;
       numtyp qik = qix*qkx + qiy*qky + qiz*qkz;
       numtyp diqk = dix*qkx + diy*qky + diz*qkz;
       numtyp dkqi = dkx*qix + dky*qiy + dkz*qiz;
-      numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) + 
+      numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
         qixx*qkxx + qiyy*qkyy + qizz*qkzz;
 
       // additional intermediates involving moments and distance
@@ -586,11 +586,11 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
       numtyp dkqirx = dkqiz*yr - dkqiy*zr;
       numtyp dkqiry = dkqix*zr - dkqiz*xr;
       numtyp dkqirz = dkqiy*xr - dkqix*yr;
-      numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy - 
+      numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
         (numtyp)2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
-      numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz - 
+      numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
         (numtyp)2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
-      numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix - 
+      numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
         (numtyp)2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
 
       // get reciprocal distance terms for this interaction
@@ -616,7 +616,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
       numtyp term3 = vali*qkr + valk*qir - dir*dkr + (numtyp)2.0*(dkqi-diqk+qiqk);
       numtyp term4 = dir*qkr - dkr*qir - 4.0*qik;
       numtyp term5 = qir*qkr;
-      numtyp eterm = term1*dmpik[0] + term2*dmpik[2] + 
+      numtyp eterm = term1*dmpik[0] + term2*dmpik[2] +
         term3*dmpik[4] + term4*dmpik[6] + term5*dmpik[8];
 
       // compute the Pauli repulsion energy for this interaction
@@ -626,7 +626,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
 
       // calculate intermediate terms for force and torque
 
-      numtyp de = term1*dmpik[2] + term2*dmpik[4] + term3*dmpik[6] + 
+      numtyp de = term1*dmpik[2] + term2*dmpik[4] + term3*dmpik[6] +
         term4*dmpik[8] + term5*dmpik[10];
       term1 = -valk*dmpik[2] + dkr*dmpik[4] - qkr*dmpik[6];
       term2 = vali*dmpik[2] + dir*dmpik[4] + qir*dmpik[6];
@@ -637,23 +637,23 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
 
       // compute the force components for this interaction
 
-      numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) + 
+      numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
         term4*qix + term5*qkx + term6*(qixk+qkxi);
-      numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) + 
+      numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
         term4*qiy + term5*qky + term6*(qiyk+qkyi);
-      numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) + 
+      numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
         term4*qiz + term5*qkz + term6*(qizk+qkzi);
       frcx = frcx*rr1 + eterm*rr3*xr;
       frcy = frcy*rr1 + eterm*rr3*yr;
       frcz = frcz*rr1 + eterm*rr3*zr;
 
       // compute the torque components for this interaction
-      
-      numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) - 
+
+      numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
         term4*qirx - term6*(qikrx+qikx);
-      numtyp ttmiy = -dmpik[2]*diky + term1*diry + term3*(dqiky+dkqiry) - 
+      numtyp ttmiy = -dmpik[2]*diky + term1*diry + term3*(dqiky+dkqiry) -
         term4*qiry - term6*(qikry+qiky);
-      numtyp ttmiz = -dmpik[2]*dikz + term1*dirz + term3*(dqikz+dkqirz) - 
+      numtyp ttmiz = -dmpik[2]*dikz + term1*dirz + term3*(dqikz+dkqirz) -
         term4*qirz - term6*(qikrz+qikz);
       ttmix = sizik * ttmix * rr1;
       ttmiy = sizik * ttmiy * rr1;
@@ -706,7 +706,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
         virial[5] += vyz;
       }
     } // nbor
-    
+
   } // ii<inum
 
   // accumulate tq
@@ -786,7 +786,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = ix.x - jx.x;
       numtyp yr = ix.y - jx.y;
@@ -794,7 +794,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-  
+
       int jtype =   polar3[j].z; // amtype[j];
       int jclass = coeff_amtype[jtype].w;  // amtype2class[jtype];
       numtyp ck = coeff_amclass[jclass].x;    // csix[jclass];
@@ -816,7 +816,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
       numtyp dk = ak * r;
       numtyp expi = ucl_exp(-di);
       numtyp expk = ucl_exp(-dk);
-     
+
       numtyp ai2,ak2;
       numtyp di4,di5;
       numtyp dk2,dk3;
@@ -844,7 +844,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
           - tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2 + dk3/(numtyp)6.0) * expk
           - (numtyp)2.0*ti2*tk*((numtyp)1.0 + di + di2/(numtyp)3.0) * expi
           - (numtyp)2.0*tk2*ti*((numtyp)1.0 + dk + dk2/(numtyp)3.0) * expk;
-        ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) + 
+        ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) +
           (numtyp)0.25 * dk2 * tk2 * ak * expk * (r*ak+(numtyp)4.0*ti-(numtyp)1.0);
 
       } else {
@@ -856,7 +856,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
       }
 
       numtyp damp = (numtyp)1.5*damp5 - (numtyp)0.5*damp3;
-      
+
       // apply damping and scaling factors for this interaction
 
       numtyp scale = factor_disp * damp*damp;
@@ -892,7 +892,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
       virial[4] += vzx;
       virial[5] += vzy;
     } // nbor
-    
+
   } // ii<inum
 
   // accumate force, energy and virial
@@ -997,7 +997,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -1005,7 +1005,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-  
+
       numtyp r = ucl_sqrt(r2);
       const numtyp4 pol1j = polar1[j];
       numtyp ck  = pol1j.x;  // rpole[j][0];
@@ -1043,12 +1043,12 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
       numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
       numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
       numtyp qkr = qkx*xr + qky*yr + qkz*zr;
-      
+
       numtyp dik = dix*dkx + diy*dky + diz*dkz;
       numtyp qik = qix*qkx + qiy*qky + qiz*qkz;
       numtyp diqk = dix*qkx + diy*qky + diz*qkz;
       numtyp dkqi = dkx*qix + dky*qiy + dkz*qiz;
-      numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) + 
+      numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
         qixx*qkxx + qiyy*qkyy + qizz*qkzz;
 
       // additional intermediates involving moments and distance
@@ -1095,11 +1095,11 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
       numtyp dkqirx = dkqiz*yr - dkqiy*zr;
       numtyp dkqiry = dkqix*zr - dkqiz*xr;
       numtyp dkqirz = dkqiy*xr - dkqix*yr;
-      numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy - 
+      numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
         (numtyp)2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
-      numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz - 
+      numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
         (numtyp)2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
-      numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix - 
+      numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
         (numtyp)2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
 
       // get reciprocal distance terms for this interaction
@@ -1164,16 +1164,16 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
       numtyp rr11ik = bn[5] - ((numtyp)1.0-scalek*dmpij[10])*rr11;
       rr1 = bn[0] - ((numtyp)1.0-scalek)*rr1;
       rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3;
-      numtyp e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik + 
-        term1i*rr1i + term1k*rr1k + term1ik*rr1ik + 
-        term2i*rr3i + term2k*rr3k + term2ik*rr3ik + 
+      numtyp e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik +
+        term1i*rr1i + term1k*rr1k + term1ik*rr1ik +
+        term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
         term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
 
       // find damped multipole intermediates for force and torque
 
-      numtyp de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik + 
-        term1i*rr3i + term1k*rr3k + term1ik*rr3ik + 
-        term2i*rr5i + term2k*rr5k + term2ik*rr5ik + 
+      numtyp de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
+        term1i*rr3i + term1k*rr3k + term1ik*rr3ik +
+        term2i*rr5i + term2k*rr5k + term2ik*rr5ik +
         term3i*rr7i + term3k*rr7k + term3ik*rr7ik;
       term1 = -corek*rr3i - valk*rr3ik + dkr*rr5ik - qkr*rr7ik;
       term2 = corei*rr3k + vali*rr3ik + dir*rr5ik + qir*rr7ik;
@@ -1187,20 +1187,20 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
 
       // compute the force components for this interaction
 
-      numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) + 
+      numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
         term4*qix + term5*qkx + term6*(qixk+qkxi);
-      numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) + 
+      numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
         term4*qiy + term5*qky + term6*(qiyk+qkyi);
-      numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) + 
+      numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
         term4*qiz + term5*qkz + term6*(qizk+qkzi);
 
       // compute the torque components for this interaction
 
-      numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) - 
+      numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) -
         term4*qirx - term6*(qikrx+qikx);
-      numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) - 
+      numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) -
         term4*qiry - term6*(qikry+qiky);
-      numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) - 
+      numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) -
         term4*qirz - term6*(qikrz+qikz);
 
       // increment force-based gradient and torque on first site
@@ -1228,12 +1228,12 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
         virial[5] += vyz;
       }
     } // nbor
-    
+
   } // ii<inum
 
   // accumulate tq
   store_answers_hippo_tq(tq,ii,inum,tid,t_per_atom,offset,i,tep);
-  
+
   // accumate force, energy and virial: use _acc if not the first kernel
   //store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
      //offset,eflag,vflag,ans,engv);
@@ -1327,7 +1327,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -1335,7 +1335,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-      
+
       numtyp r = ucl_sqrt(r2);
       numtyp rinv = ucl_recip(r);
       numtyp r2inv = rinv*rinv;
@@ -1408,7 +1408,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
       // find the field components for charge penetration damping
       numtyp dmpi[7],dmpk[7];
       dampdir(r,alphai,alphak,dmpi,dmpk);
-          
+
       numtyp scalek = factor_dscale;
       numtyp rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3;
       numtyp rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5;
@@ -1439,7 +1439,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
         rr3k*dky + (numtyp)2.0*rr5k*qky;
       fip[2] = -zr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
         rr3k*dkz + (numtyp)2.0*rr5k*qkz;
-          
+
       // find terms needed later to compute mutual polarization
 
       _fieldp[0] += fid[0];
@@ -1453,7 +1453,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
   } // ii<inum
 
   // accumulate field and fieldp
-  
+
   store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
 }
 
@@ -1514,7 +1514,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
     int itype,igroup;
     numtyp bn[4],bcn[3];
     numtyp fid[3],fip[3];
-    
+
     itype  = polar3[i].z; // amtype[i];
     igroup = polar3[i].w; // amgroup[i];
 
@@ -1534,7 +1534,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -1542,7 +1542,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-  
+
       numtyp r = ucl_sqrt(r2);
       numtyp rinv = ucl_recip(r);
       numtyp r2inv = rinv*rinv;
@@ -1595,7 +1595,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
       }
 
       // find terms needed later to compute mutual polarization
-      // if (poltyp != DIRECT) 
+      // if (poltyp != DIRECT)
       numtyp dmpik[5];
       dampmut(r,alphai,alphak,dmpik);
       numtyp scalek = factor_wscale;
@@ -1610,17 +1610,17 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
       tdipdip[3] = -rr3ik + rr5ik*yr*yr;
       tdipdip[4] = rr5ik*yr*zr;
       tdipdip[5] = -rr3ik + rr5ik*zr*zr;
-      //if (i==0 && j == 10) 
+      //if (i==0 && j == 10)
       //  printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
       //    i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
       fid[0] = tdipdip[0]*ukx + tdipdip[1]*uky + tdipdip[2]*ukz;
       fid[1] = tdipdip[1]*ukx + tdipdip[3]*uky + tdipdip[4]*ukz;
       fid[2] = tdipdip[2]*ukx + tdipdip[4]*uky + tdipdip[5]*ukz;
-      
+
       fip[0] = tdipdip[0]*ukxp + tdipdip[1]*ukyp + tdipdip[2]*ukzp;
       fip[1] = tdipdip[1]*ukxp + tdipdip[3]*ukyp + tdipdip[4]*ukzp;
       fip[2] = tdipdip[2]*ukxp + tdipdip[4]*ukyp + tdipdip[5]*ukzp;
-      
+
       _fieldp[0] += fid[0];
       _fieldp[1] += fid[1];
       _fieldp[2] += fid[2];
@@ -1632,7 +1632,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
   } // ii<inum
 
   // accumulate field and fieldp
-  
+
   store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
 }
 
@@ -1754,7 +1754,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
 
       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
       //int jtype=jx.w;
- 
+
       // Compute r12
       numtyp xr = jx.x - ix.x;
       numtyp yr = jx.y - ix.y;
@@ -1762,7 +1762,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
       //if (r2>off2) continue;
-  
+
       numtyp r = ucl_sqrt(r2);
 
       const numtyp4 pol1j = polar1[j];
@@ -1905,7 +1905,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
 
 
       // get the field gradient for direct polarization force
-      
+
       numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i;
       numtyp term1k,term2k,term3k,term4k,term5k,term6k,term7k,term8k;
       numtyp term1core;
@@ -1987,7 +1987,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
         dir*term4i - qixy*term5i + qiy*term6i + qix*term7i - qir*term8i;
       tkxy = -valk*term1k - corek*term1core - dky*term2k - dkx*term3k +
         dkr*term4k - qkxy*term5k + qky*term6k + qkx*term7k - qkr*term8k;
-      
+
       term2i = rr5i*xr;
       term1i = zr * term2i;
       term1core = rr5core*xr*zr;
@@ -2039,7 +2039,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
       numtyp frcx = (numtyp)-2.0 * depx;
       numtyp frcy = (numtyp)-2.0 * depy;
       numtyp frcz = (numtyp)-2.0 * depz;
-       
+
       // get the dEp/dR terms used for direct polarization force
       // poltyp == MUTUAL && hippo
       // tixx and tkxx
@@ -2108,7 +2108,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
         virial[5] += vyz;
       }
     } // nbor
-    
+
   } // ii<inum
 
   // accumulate ufld and dufld to compute tep
@@ -2139,7 +2139,7 @@ __kernel void k_special15(__global int * dev_nbor,
   atom_info(t_per_atom,ii,tid,offset);
 
   if (ii<inum) {
-  
+
     int numj, nbor, nbor_end;
     nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
               n_stride,nbor_end,nbor);
diff --git a/lib/gpu/lal_hippo.h b/lib/gpu/lal_hippo.h
index a6742eee28..289f769813 100644
--- a/lib/gpu/lal_hippo.h
+++ b/lib/gpu/lal_hippo.h
@@ -154,13 +154,13 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
   UCL_D_Vec<numtyp4> coeff_amtype;
   /// csix = coeff_amclass.x; adisp = coeff_amclass.y;
   UCL_D_Vec<numtyp4> coeff_amclass;
-  /// Special polar values [0-4]: 
+  /// Special polar values [0-4]:
   ///   sp_polar.x = special_polar_wscale
   ///   sp_polar.y special_polar_pscale,
   ///   sp_polar.z = special_polar_piscale
   ///   sp_polar.w = special_mpole
   UCL_D_Vec<numtyp4> sp_polar;
-  /// Special nonpolar values [0-4]: 
+  /// Special nonpolar values [0-4]:
   ///   sp_nonpolar.x = special_hal
   ///   sp_nonpolar.y special_repel
   ///   sp_nonpolar.z = special_disp
@@ -184,7 +184,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
   int udirect2b(const int eflag, const int vflag);
   int umutual2b(const int eflag, const int vflag);
   int polar_real(const int eflag, const int vflag);
-  
+
 };
 
 }
diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp
index 16b697d88f..982cf894a6 100644
--- a/lib/gpu/lal_hippo_ext.cpp
+++ b/lib/gpu/lal_hippo_ext.cpp
@@ -129,7 +129,7 @@ int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,
                            const bool vatom, int &host_start,
                            int **ilist, int **jnum, const double cpu_time,
                            bool &success, const double aewald, const double off2,
-                           double *host_q, double *boxlo, double *prd) {                             
+                           double *host_q, double *boxlo, double *prd) {
   return HIPPOMF.compute_dispersion_real(ago, inum_full, nall, host_x, host_type,
                           host_amtype, host_amgroup, host_rpole, sublo, subhi,
                           tag, nspecial, special, nspecial15, special15,
@@ -175,7 +175,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
 int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full,
                            const int nall, double **host_x, int *host_type,
                            int *host_amtype, int *host_amgroup, double **host_rpole,
-                           double **host_uind, double **host_uinp, double *host_pval, 
+                           double **host_uind, double **host_uinp, double *host_pval,
                            double *sublo, double *subhi, tagint *tag, int **nspecial,
                            tagint **special, int *nspecial15, tagint** special15,
                            const bool eflag, const bool vflag,
diff --git a/lib/gpu/lal_hippo_extra.h b/lib/gpu/lal_hippo_extra.h
index 61bfebc17f..0b8f96f69b 100644
--- a/lib/gpu/lal_hippo_extra.h
+++ b/lib/gpu/lal_hippo_extra.h
@@ -116,46 +116,46 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
     tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term;
     s = (dampi-tmp)*expk + (dampk+tmp)*expi;
 
-    ds = (dmpi2*dmpk2*r2 - (numtyp)4.0*dmpi2*dmpk22*r/term - 
-          (numtyp)4.0*dmpi2*dmpk2/term) * expk + 
+    ds = (dmpi2*dmpk2*r2 - (numtyp)4.0*dmpi2*dmpk22*r/term -
+          (numtyp)4.0*dmpi2*dmpk2/term) * expk +
       (dmpi2*dmpk2*r2 + (numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0*dmpi2*dmpk2/term) * expi;
-    d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/(numtyp)3.0 - 
-           ((numtyp)4.0/(numtyp)3.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term - 
-           (numtyp)4.0*dmpi2*dmpk2/term) * expk + 
-      (dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 + 
-       ((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term + 
+    d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/(numtyp)3.0 -
+           ((numtyp)4.0/(numtyp)3.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
+           (numtyp)4.0*dmpi2*dmpk2/term) * expk +
+      (dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 +
+       ((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
        (numtyp)4.0*dmpi2*dmpk2/term) * expi;
-    d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 - 
-           (4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term - 
-           (numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk + 
-      (dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 + 
-       ((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term + 
+    d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
+           (4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
+           (numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
+      (dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 +
+       ((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term +
        (numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0/term*dmpi2*dmpk2) * expi;
-    d4s = (dmpi2*dmpk24*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi2*dmpk23*r4 + 
-           dmpi2*dmpk22*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 - 
-           ((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term - 
-           ((numtyp)12.0/(numtyp)7.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term - 
-           (numtyp)4.0*dmpi2*dmpk2/term) * expk + 
-      (dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 + 
-       dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 + 
-       ((numtyp)4.0/(numtyp)105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/(numtyp)21.0)*dmpi24*dmpk2*r3/term + 
-       ((numtyp)12.0/(numtyp)7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term + 
+    d4s = (dmpi2*dmpk24*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi2*dmpk23*r4 +
+           dmpi2*dmpk22*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 -
+           ((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term -
+           ((numtyp)12.0/(numtyp)7.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
+           (numtyp)4.0*dmpi2*dmpk2/term) * expk +
+      (dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
+       dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 +
+       ((numtyp)4.0/(numtyp)105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/(numtyp)21.0)*dmpi24*dmpk2*r3/term +
+       ((numtyp)12.0/(numtyp)7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
        (numtyp)4.0*dmpi2*dmpk2/term) * expi;
-    
+
     if (rorder >= 11) {
       r6 = r5 * r;
       dmpi26 = dmpi25 * dmpi2;
       dmpk26 = dmpk25 * dmpk2;
-      d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 + 
-             dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 - 
-             (4.0/945.0)*dmpi2*dmpk26*r5/term - 
-             (4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term - 
-             (16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term - 
-             4.0*dmpi2*dmpk2/term) * expk + 
-        (dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 + 
-         dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 + 
-         (4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term + 
-         (4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term + 
+      d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 +
+             dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 -
+             (4.0/945.0)*dmpi2*dmpk26*r5/term -
+             (4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term -
+             (16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
+             4.0*dmpi2*dmpk2/term) * expk +
+        (dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 +
+         dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 +
+         (4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term +
+         (4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term +
          4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
     }
   }
@@ -214,7 +214,7 @@ ucl_inline void damppole(const numtyp r, const int rorder,
   // compute tolerance and exponential damping factors
 
   eps = (numtyp)0.001;
-  diff = alphai-alphak; 
+  diff = alphai-alphak;
   if (diff < (numtyp)0) diff = -diff;
   dampi = alphai * r;
   dampk = alphak * r;
@@ -231,7 +231,7 @@ ucl_inline void damppole(const numtyp r, const int rorder,
   dmpi[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2)*expi;
   dmpi[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi;
   dmpi[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi;
-  dmpi[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + 
+  dmpi[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
                    (numtyp)4.0*dampi4/(numtyp)105.0 + dampi5/(numtyp)210.0)*expi;
   if (diff < eps) {
     dmpk[0] = dmpi[0];
@@ -248,7 +248,7 @@ ucl_inline void damppole(const numtyp r, const int rorder,
     dmpk[2] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2)*expk;
     dmpk[4] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk;
     dmpk[6] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/(numtyp)30.0)*expk;
-    dmpk[8] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + 
+    dmpk[8] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 +
                      (numtyp)4.0*dampk4/(numtyp)105.0 + dampk5/(numtyp)210.0)*expk;
   }
 
@@ -257,21 +257,21 @@ ucl_inline void damppole(const numtyp r, const int rorder,
   if (diff < eps) {
     dampi6 = dampi3 * dampi3;
     dampi7 = dampi3 * dampi4;
-    dmpik[0] = (numtyp)1.0 - ((numtyp)1.0 + (numtyp)11.0*dampi/(numtyp)16.0 + (numtyp)3.0*dampi2/(numtyp)16.0 + 
+    dmpik[0] = (numtyp)1.0 - ((numtyp)1.0 + (numtyp)11.0*dampi/(numtyp)16.0 + (numtyp)3.0*dampi2/(numtyp)16.0 +
                       dampi3/(numtyp)48.0)*expi;
-    dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + 
+    dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
                       (numtyp)7.0*dampi3/(numtyp)48.0 + dampi4/(numtyp)48.0)*expi;
-    dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + 
+    dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
                       dampi4/(numtyp)24.0 + dampi5/(numtyp)144.0)*expi;
-    dmpik[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + 
+    dmpik[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
                       dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0)*expi;
-    dmpik[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + 
-                      dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 + 
+    dmpik[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
+                      dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 +
                       dampi7/(numtyp)5040.0)*expi;
     if (rorder >= 11) {
       dampi8 = dampi4 * dampi4;
       dmpik[10] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
-                         dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 + 
+                         dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 +
                          dampi7/(numtyp)5040.0 + dampi8/(numtyp)45360.0)*expi;
     }
 
@@ -282,41 +282,41 @@ ucl_inline void damppole(const numtyp r, const int rorder,
     termk = alphai2 / (alphai2-alphak2);
     termi2 = termi * termi;
     termk2 = termk * termk;
-    dmpik[0] = (numtyp)1.0 - termi2*(1.0 + (numtyp)2.0*termk + (numtyp)0.5*dampi)*expi - 
+    dmpik[0] = (numtyp)1.0 - termi2*(1.0 + (numtyp)2.0*termk + (numtyp)0.5*dampi)*expi -
       termk2*((numtyp)1.0 + (numtyp)2.0*termi + (numtyp)0.5*dampk)*expk;
     dmpik[2] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2)*expi -
       termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2)*expk -
       (numtyp)2.0*termi2*termk*((numtyp)1.0+dampi)*expi -
       (numtyp)2.0*termk2*termi*((numtyp)1.0+dampk)*expk;
-    dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi - 
-      termk2*(1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk - 
-      (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + dampi2/(numtyp)3.0)*expi - 
+    dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi -
+      termk2*(1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk -
+      (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + dampi2/(numtyp)3.0)*expi -
       (numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + dampk2/(numtyp)3.0)*expk;
-    dmpik[6] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + 
-                             dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi - 
-      termk2*((numtyp)1.0 + dampk + 0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/(numtyp)30.0)*expk - 
-      (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)2.0*dampi2/(numtyp)5.0 + dampi3/(numtyp)15.0)*expi - 
+    dmpik[6] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
+                             dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi -
+      termk2*((numtyp)1.0 + dampk + 0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/(numtyp)30.0)*expk -
+      (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)2.0*dampi2/(numtyp)5.0 + dampi3/(numtyp)15.0)*expi -
       (numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + (numtyp)2.0*dampk2/(numtyp)5.0 + dampk3/(numtyp)15.0)*expk;
-    dmpik[8] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + 
-                             (numtyp)4.0*dampi4/(numtyp)105.0 + dampi5/(numtyp)210.0)*expi - 
-      termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + 
-              (numtyp)4.0*dampk4/105.0 + dampk5/(numtyp)210.0)*expk - 
-      (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)3.0*dampi2/(numtyp)7.0 + 
-                        (numtyp)2.0*dampi3/(numtyp)21.0 + dampi4/(numtyp)105.0)*expi - 
-      (numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + (numtyp)3.0*dampk2/(numtyp)7.0 + 
+    dmpik[8] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
+                             (numtyp)4.0*dampi4/(numtyp)105.0 + dampi5/(numtyp)210.0)*expi -
+      termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 +
+              (numtyp)4.0*dampk4/105.0 + dampk5/(numtyp)210.0)*expk -
+      (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)3.0*dampi2/(numtyp)7.0 +
+                        (numtyp)2.0*dampi3/(numtyp)21.0 + dampi4/(numtyp)105.0)*expi -
+      (numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + (numtyp)3.0*dampk2/(numtyp)7.0 +
                         (numtyp)2.0*dampk3/(numtyp)21.0 + dampk4/(numtyp)105.0)*expk;
-    
+
     if (rorder >= 11) {
       dampi6 = dampi3 * dampi3;
       dampk6 = dampk3 * dampk3;
-      dmpik[10] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + 
-                                (numtyp)5.0*dampi4/(numtyp)126.0 + (numtyp)2.0*dampi5/(numtyp)315.0 + 
-                                dampi6/(numtyp)1890.0)*expi - 
-        termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + (numtyp)5.0*dampk4/(numtyp)126.0 + 
-                (numtyp)2.0*dampk5/(numtyp)315.0 + dampk6/(numtyp)1890.0)*expk - 
-        (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)4.0*dampi2/(numtyp)9.0 + dampi3/(numtyp)9.0 + 
-                          dampi4/(numtyp)63.0 + dampi5/(numtyp)945.0)*expi - 
-        (numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + 4.0*dampk2/(numtyp)9.0 + dampk3/(numtyp)9.0 + 
+      dmpik[10] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
+                                (numtyp)5.0*dampi4/(numtyp)126.0 + (numtyp)2.0*dampi5/(numtyp)315.0 +
+                                dampi6/(numtyp)1890.0)*expi -
+        termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + (numtyp)5.0*dampk4/(numtyp)126.0 +
+                (numtyp)2.0*dampk5/(numtyp)315.0 + dampk6/(numtyp)1890.0)*expk -
+        (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)4.0*dampi2/(numtyp)9.0 + dampi3/(numtyp)9.0 +
+                          dampi4/(numtyp)63.0 + dampi5/(numtyp)945.0)*expi -
+        (numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + 4.0*dampk2/(numtyp)9.0 + dampk3/(numtyp)9.0 +
                           dampk4/(numtyp)63.0 + dampk5/(numtyp)945.0)*expk;
     }
   }
@@ -404,9 +404,9 @@ ucl_inline void dampmut(numtyp r, numtyp alphai, numtyp alphak, numtyp dmpik[5])
   if (diff < eps) {
     dampi4 = dampi2 * dampi2;
     dampi5 = dampi2 * dampi3;
-    dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + 
+    dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
                       7.0*dampi3/(numtyp)48.0 + dampi4/48.0)*expi;
-    dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + 
+    dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
                       dampi4/(numtyp)24.0 + dampi5/(numtyp)144.0)*expi;
   } else {
     dampk2 = dampk * dampk;
@@ -417,12 +417,12 @@ ucl_inline void dampmut(numtyp r, numtyp alphai, numtyp alphak, numtyp dmpik[5])
     termk = alphai2 / (alphai2-alphak2);
     termi2 = termi * termi;
     termk2 = termk * termk;
-    dmpik[2] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2)*expi - 
-      termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2)*expk - 
+    dmpik[2] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2)*expi -
+      termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2)*expk -
       (numtyp)2.0*termi2*termk*((numtyp)1.0+dampi)*expi - (numtyp)2.0*termk2*termi*((numtyp)1.0+dampk)*expk;
-    dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi - 
-      termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2 + dampk3/(numtyp)6.00)*expk - 
-      (numtyp)2.0*termi2*termk *((numtyp)1.0+dampi+dampi2/(numtyp)3.0)*expi - 
+    dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi -
+      termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2 + dampk3/(numtyp)6.00)*expk -
+      (numtyp)2.0*termi2*termk *((numtyp)1.0+dampi+dampi2/(numtyp)3.0)*expi -
       (numtyp)2.0*termk2*termi *((numtyp)1.0+dampk+dampk2/(numtyp)3.0)*expk;
   }
 }
diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh
index 9e231663c0..9da06cf636 100755
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@@ -91,6 +91,8 @@ action pair_gauss_gpu.cpp pair_gauss.cpp
 action pair_gauss_gpu.h pair_gauss.h
 action pair_gayberne_gpu.cpp pair_gayberne.cpp
 action pair_gayberne_gpu.h pair_gayberne.cpp
+action pair_hippo_gpu.cpp pair_hippo.cpp
+action pair_hippo_gpu.h pair_hippo.cpp
 action pair_lj96_cut_gpu.cpp pair_lj96_cut.cpp
 action pair_lj96_cut_gpu.h pair_lj96_cut.h
 action pair_lj_charmm_coul_long_gpu.cpp pair_lj_charmm_coul_long.cpp
diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp
index 91bc679447..e1fe1f1097 100644
--- a/src/GPU/pair_amoeba_gpu.cpp
+++ b/src/GPU/pair_amoeba_gpu.cpp
@@ -76,7 +76,7 @@ int ** amoeba_gpu_compute_multipole_real(const int ago, const int inum, const in
 
 int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nall,
               double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
-              double **host_rpole, double **host_uind, double **host_uinp, 
+              double **host_rpole, double **host_uind, double **host_uinp,
               double *sublo, double *subhi, tagint *tag, int **nspecial,
               tagint **special, int* nspecial15, tagint** special15,
               const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@@ -86,7 +86,7 @@ int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nal
 
 int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
               double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
-              double **host_rpole, double **host_uind, double **host_uinp, 
+              double **host_rpole, double **host_uind, double **host_uinp,
               double *sublo, double *subhi, tagint *tag, int **nspecial,
               tagint **special, int* nspecial15, tagint** special15,
               const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@@ -170,7 +170,7 @@ void PairAmoebaGPU::init_style()
     maxspecial=atom->maxspecial;
     maxspecial15=atom->maxspecial15;
   }
-    
+
   int tq_size;
   int mnf = 5e-2 * neighbor->oneatom;
   int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
@@ -207,7 +207,7 @@ void PairAmoebaGPU::multipole_real()
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
-  
+
   double sublo[3],subhi[3];
   if (domain->triclinic == 0) {
     sublo[0] = domain->sublo[0];
@@ -239,7 +239,7 @@ void PairAmoebaGPU::multipole_real()
                                                  host_start, &ilist, &numneigh, cpu_time,
                                                  success, aewald, felec, off2, atom->q,
                                                  domain->boxlo, domain->prd, &tq_pinned);
-  
+
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
@@ -281,7 +281,7 @@ void PairAmoebaGPU::induce()
 
   // set cutoffs, taper coeffs, and PME params
   // create qfac here, free at end of polar()
-  
+
   if (use_ewald) {
     choose(POLAR_LONG);
     int nmine = p_kspace->nfft_owned;
@@ -317,7 +317,7 @@ void PairAmoebaGPU::induce()
   memory->create(usump,nlocal,3,"ameoba/induce:usump");
 
   // get the electrostatic field due to permanent multipoles
-  
+
   dfield0c(field,fieldp);
 
   // need reverse_comm_pair if dfield0c (i.e. udirect2b) is CPU-only
@@ -345,7 +345,7 @@ void PairAmoebaGPU::induce()
   for (i = 0; i < 10; i++) {
     printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n",
       i, udir[i][0], udir[i][1], udir[i][2],
-      udirp[i][0], udirp[i][1], udirp[i][2]); 
+      udirp[i][0], udirp[i][1], udirp[i][2]);
   }
 */
   // get induced dipoles via the OPT extrapolation method
@@ -353,7 +353,7 @@ void PairAmoebaGPU::induce()
   //       uopt,uoptp with a optorder+1 dimension, just optorder ??
   //       since no need to store optorder+1 values after these loops
 
-  if (poltyp == OPT) { 
+  if (poltyp == OPT) {
     for (i = 0; i < nlocal; i++) {
       for (j = 0; j < 3; j++) {
         uopt[i][0][j] = udir[i][j];
@@ -460,7 +460,7 @@ void PairAmoebaGPU::induce()
       crstyle = FIELD;
       comm->reverse_comm_pair(this);
     }
-    
+
     //error->all(FLERR,"STOP GPU");
 
     // set initial conjugate gradient residual and conjugate vector
@@ -486,7 +486,7 @@ void PairAmoebaGPU::induce()
       cfstyle = RSD;
       comm->forward_comm_pair(this);
       uscale0b(BUILD,rsd,rsdp,zrsd,zrsdp);
-      uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp); 
+      uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
       crstyle = ZRSD;
       comm->reverse_comm_pair(this);
    }
@@ -574,7 +574,7 @@ void PairAmoebaGPU::induce()
       if (pcgprec) {
         cfstyle = RSD;
         comm->forward_comm_pair(this);
-        uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp); 
+        uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
         crstyle = ZRSD;
         comm->reverse_comm_pair(this);
       }
@@ -629,7 +629,7 @@ void PairAmoebaGPU::induce()
       if (iter >= politer) done = true;
 
       //  apply a "peek" iteration to the mutual induced dipoles
-     
+
       if (done) {
         for (i = 0; i < nlocal; i++) {
           term = pcgpeek * poli[i];
@@ -644,7 +644,7 @@ void PairAmoebaGPU::induce()
 
     // terminate the calculation if dipoles failed to converge
     // NOTE: could make this an error
-    
+
     if (iter >= maxiter || eps > epsold)
       if (me == 0)
 	      error->warning(FLERR,"AMOEBA induced dipoles did not converge");
@@ -652,7 +652,7 @@ void PairAmoebaGPU::induce()
 
   // DEBUG output to dump file
 
-  if (uind_flag) 
+  if (uind_flag)
     dump6(fp_uind,"id uindx uindy uindz uinpx uinpy uinpz",DEBYE,uind,uinp);
 
   // deallocation of arrays
@@ -700,7 +700,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
     PairAmoeba::udirect2b(field, fieldp);
     return;
   }
-   
+
   int eflag=1, vflag=1;
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
@@ -753,7 +753,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
     int idx = 4*i;
     field[i][0] += field_ptr[idx];
     field[i][1] += field_ptr[idx+1];
-    field[i][2] += field_ptr[idx+2]; 
+    field[i][2] += field_ptr[idx+2];
   }
 
   double* fieldp_ptr = (double *)fieldp_pinned;
@@ -764,7 +764,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
     fieldp[i][1] += fieldp_ptr[idx+1];
     fieldp[i][2] += fieldp_ptr[idx+2];
   }
-  
+
 }
 
 /* ----------------------------------------------------------------------
@@ -802,7 +802,7 @@ void PairAmoebaGPU::udirect2b_cpu()
   firstneigh = list->firstneigh;
 
   // NOTE: doesn't this have a problem if aewald is tiny ??
-  
+
   aesq2 = 2.0 * aewald * aewald;
   aesq2n = 0.0;
   if (aewald > 0.0) aesq2n = 1.0 / (MY_PIS*aewald);
@@ -829,13 +829,13 @@ void PairAmoebaGPU::udirect2b_cpu()
     pdi = pdamp[itype];
     pti = thole[itype];
     ddi = dirdamp[itype];
-    
+
     // evaluate all sites within the cutoff distance
 
     for (jj = 0; jj < jnum; jj++) {
       jextra = jlist[jj];
       j = jextra & NEIGHMASK15;
-      
+
       xr = x[j][0] - x[i][0];
       yr = x[j][1] - x[i][1];
       zr = x[j][2] - x[i][2];
@@ -844,7 +844,7 @@ void PairAmoebaGPU::udirect2b_cpu()
 
       jtype = amtype[j];
       jgroup = amgroup[j];
-      
+
       factor_wscale = special_polar_wscale[sbmask15(jextra)];
       if (igroup == jgroup) {
         factor_pscale = special_polar_piscale[sbmask15(jextra)];
@@ -872,7 +872,7 @@ void PairAmoebaGPU::udirect2b_cpu()
         aefac = aesq2 * aefac;
         bn[m] = (bfac*bn[m-1]+aefac*exp2a) * rr2;
       }
-      
+
       // find terms needed later to compute mutual polarization
 
       if (poltyp != DIRECT) {
@@ -891,7 +891,7 @@ void PairAmoebaGPU::udirect2b_cpu()
         scalek = factor_uscale;
         bcn[0] = bn[1] - (1.0-scalek*scale3)*rr3;
         bcn[1] = bn[2] - (1.0-scalek*scale5)*rr5;
-        
+
         neighptr[n++] = j;
         tdipdip[ndip++] = -bcn[0] + bcn[1]*xr*xr;
         tdipdip[ndip++] = bcn[1]*xr*yr;
@@ -902,7 +902,7 @@ void PairAmoebaGPU::udirect2b_cpu()
       } else {
         if (comm->me == 0) printf("i = %d: j = %d: poltyp == DIRECT\n", i, j);
       }
-      
+
     } // jj
 
     firstneigh_dipole[i] = neighptr;
@@ -973,7 +973,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
     int idx = 4*i;
     field[i][0] += field_ptr[idx];
     field[i][1] += field_ptr[idx+1];
-    field[i][2] += field_ptr[idx+2]; 
+    field[i][2] += field_ptr[idx+2];
   }
 
   double* fieldp_ptr = (double *)fieldp_pinned;
@@ -1001,7 +1001,7 @@ void PairAmoebaGPU::polar_real()
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
-  
+
   double sublo[3],subhi[3];
   if (domain->triclinic == 0) {
     sublo[0] = domain->sublo[0];
@@ -1033,7 +1033,7 @@ void PairAmoebaGPU::polar_real()
                                              host_start, &ilist, &numneigh, cpu_time,
                                              success, aewald, felec, off2, atom->q,
                                              domain->boxlo, domain->prd, &tq_pinned);
-  
+
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
@@ -1091,11 +1091,11 @@ void PairAmoebaGPU::compute_force_from_torque(const numtyp* tq_ptr,
     vxx = xix*fix[0] + xiy*fiy[0] + xiz*fiz[0];
     vyy = yix*fix[1] + yiy*fiy[1] + yiz*fiz[1];
     vzz = zix*fix[2] + ziy*fiy[2] + ziz*fiz[2];
-    vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] + 
+    vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
                  xix*fix[1] + xiy*fiy[1] + xiz*fiz[1]);
-    vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] + 
+    vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
                  xix*fix[2] + xiy*fiy[2] + xiz*fiz[2]);
-    vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] + 
+    vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
                  yix*fix[2] + yiy*fiy[2] + yiz*fiz[2]);
 
     virial_comp[0] += vxx;
diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp
index f4cbf28561..fbc1b6b238 100644
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@@ -88,7 +88,7 @@ int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int
 
 int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall,
               double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
-              double **host_rpole, double **host_uind, double **host_uinp, 
+              double **host_rpole, double **host_uind, double **host_uinp,
               double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial,
               tagint **special, int* nspecial15, tagint** special15,
               const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@@ -185,7 +185,7 @@ void PairHippoGPU::init_style()
     maxspecial=atom->maxspecial;
     maxspecial15=atom->maxspecial15;
   }
-    
+
   int tq_size;
   int mnf = 5e-2 * neighbor->oneatom;
   int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
@@ -222,7 +222,7 @@ void PairHippoGPU::dispersion_real()
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
-  
+
   double sublo[3],subhi[3];
   if (domain->triclinic == 0) {
     sublo[0] = domain->sublo[0];
@@ -250,7 +250,7 @@ void PairHippoGPU::dispersion_real()
                                                  host_start, &ilist, &numneigh, cpu_time,
                                                  success, aewald, off2, atom->q,
                                                  domain->boxlo, domain->prd);
-  
+
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 }
@@ -270,7 +270,7 @@ void PairHippoGPU::multipole_real()
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
-  
+
   double sublo[3],subhi[3];
   if (domain->triclinic == 0) {
     sublo[0] = domain->sublo[0];
@@ -302,7 +302,7 @@ void PairHippoGPU::multipole_real()
                                                 host_start, &ilist, &numneigh, cpu_time,
                                                 success, aewald, felec, off2, atom->q,
                                                 domain->boxlo, domain->prd, &tq_pinned);
-  
+
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
@@ -344,7 +344,7 @@ void PairHippoGPU::induce()
 
   // set cutoffs, taper coeffs, and PME params
   // create qfac here, free at end of polar()
-  
+
   if (use_ewald) {
     choose(POLAR_LONG);
     int nmine = p_kspace->nfft_owned;
@@ -380,7 +380,7 @@ void PairHippoGPU::induce()
   memory->create(usump,nlocal,3,"ameoba/induce:usump");
 
   // get the electrostatic field due to permanent multipoles
-  
+
   dfield0c(field,fieldp);
 
   // need reverse_comm_pair if dfield0c (i.e. udirect2b) is CPU-only
@@ -408,7 +408,7 @@ void PairHippoGPU::induce()
   for (i = 0; i < 10; i++) {
     printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n",
       i, udir[i][0], udir[i][1], udir[i][2],
-      udirp[i][0], udirp[i][1], udirp[i][2]); 
+      udirp[i][0], udirp[i][1], udirp[i][2]);
   }
 */
   // get induced dipoles via the OPT extrapolation method
@@ -416,7 +416,7 @@ void PairHippoGPU::induce()
   //       uopt,uoptp with a optorder+1 dimension, just optorder ??
   //       since no need to store optorder+1 values after these loops
 
-  if (poltyp == OPT) { 
+  if (poltyp == OPT) {
     for (i = 0; i < nlocal; i++) {
       for (j = 0; j < 3; j++) {
         uopt[i][0][j] = udir[i][j];
@@ -523,7 +523,7 @@ void PairHippoGPU::induce()
       crstyle = FIELD;
       comm->reverse_comm_pair(this);
     }
-    
+
     //error->all(FLERR,"STOP GPU");
 
     // set initial conjugate gradient residual and conjugate vector
@@ -549,7 +549,7 @@ void PairHippoGPU::induce()
       cfstyle = RSD;
       comm->forward_comm_pair(this);
       uscale0b(BUILD,rsd,rsdp,zrsd,zrsdp);
-      uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp); 
+      uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
       crstyle = ZRSD;
       comm->reverse_comm_pair(this);
    }
@@ -637,7 +637,7 @@ void PairHippoGPU::induce()
       if (pcgprec) {
         cfstyle = RSD;
         comm->forward_comm_pair(this);
-        uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp); 
+        uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
         crstyle = ZRSD;
         comm->reverse_comm_pair(this);
       }
@@ -692,7 +692,7 @@ void PairHippoGPU::induce()
       if (iter >= politer) done = true;
 
       //  apply a "peek" iteration to the mutual induced dipoles
-     
+
       if (done) {
         for (i = 0; i < nlocal; i++) {
           term = pcgpeek * poli[i];
@@ -707,7 +707,7 @@ void PairHippoGPU::induce()
 
     // terminate the calculation if dipoles failed to converge
     // NOTE: could make this an error
-    
+
     if (iter >= maxiter || eps > epsold)
       if (me == 0)
 	      error->warning(FLERR,"hippo induced dipoles did not converge");
@@ -715,7 +715,7 @@ void PairHippoGPU::induce()
 
   // DEBUG output to dump file
 
-  if (uind_flag) 
+  if (uind_flag)
     dump6(fp_uind,"id uindx uindy uindz uinpx uinpy uinpz",DEBYE,uind,uinp);
 
   // deallocation of arrays
@@ -763,7 +763,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
     PairAmoeba::udirect2b(field, fieldp);
     return;
   }
-   
+
   int eflag=1, vflag=1;
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
@@ -816,7 +816,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
     int idx = 4*i;
     field[i][0] += field_ptr[idx];
     field[i][1] += field_ptr[idx+1];
-    field[i][2] += field_ptr[idx+2]; 
+    field[i][2] += field_ptr[idx+2];
   }
 
   double* fieldp_ptr = (double *)fieldp_pinned;
@@ -827,7 +827,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
     fieldp[i][1] += fieldp_ptr[idx+1];
     fieldp[i][2] += fieldp_ptr[idx+2];
   }
-  
+
 }
 
 /* ----------------------------------------------------------------------
@@ -865,7 +865,7 @@ void PairHippoGPU::udirect2b_cpu()
   firstneigh = list->firstneigh;
 
   // NOTE: doesn't this have a problem if aewald is tiny ??
-  
+
   aesq2 = 2.0 * aewald * aewald;
   aesq2n = 0.0;
   if (aewald > 0.0) aesq2n = 1.0 / (MY_PIS*aewald);
@@ -892,13 +892,13 @@ void PairHippoGPU::udirect2b_cpu()
     pdi = pdamp[itype];
     pti = thole[itype];
     ddi = dirdamp[itype];
-    
+
     // evaluate all sites within the cutoff distance
 
     for (jj = 0; jj < jnum; jj++) {
       jextra = jlist[jj];
       j = jextra & NEIGHMASK15;
-      
+
       xr = x[j][0] - x[i][0];
       yr = x[j][1] - x[i][1];
       zr = x[j][2] - x[i][2];
@@ -907,7 +907,7 @@ void PairHippoGPU::udirect2b_cpu()
 
       jtype = amtype[j];
       jgroup = amgroup[j];
-      
+
       factor_wscale = special_polar_wscale[sbmask15(jextra)];
       if (igroup == jgroup) {
         factor_pscale = special_polar_piscale[sbmask15(jextra)];
@@ -935,7 +935,7 @@ void PairHippoGPU::udirect2b_cpu()
         aefac = aesq2 * aefac;
         bn[m] = (bfac*bn[m-1]+aefac*exp2a) * rr2;
       }
-      
+
       // find terms needed later to compute mutual polarization
 
       if (poltyp != DIRECT) {
@@ -954,7 +954,7 @@ void PairHippoGPU::udirect2b_cpu()
         scalek = factor_uscale;
         bcn[0] = bn[1] - (1.0-scalek*scale3)*rr3;
         bcn[1] = bn[2] - (1.0-scalek*scale5)*rr5;
-        
+
         neighptr[n++] = j;
         tdipdip[ndip++] = -bcn[0] + bcn[1]*xr*xr;
         tdipdip[ndip++] = bcn[1]*xr*yr;
@@ -965,7 +965,7 @@ void PairHippoGPU::udirect2b_cpu()
       } else {
         if (comm->me == 0) printf("i = %d: j = %d: poltyp == DIRECT\n", i, j);
       }
-      
+
     } // jj
 
     firstneigh_dipole[i] = neighptr;
@@ -1036,7 +1036,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
     int idx = 4*i;
     field[i][0] += field_ptr[idx];
     field[i][1] += field_ptr[idx+1];
-    field[i][2] += field_ptr[idx+2]; 
+    field[i][2] += field_ptr[idx+2];
   }
 
   double* fieldp_ptr = (double *)fieldp_pinned;
@@ -1064,7 +1064,7 @@ void PairHippoGPU::polar_real()
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
-  
+
   double sublo[3],subhi[3];
   if (domain->triclinic == 0) {
     sublo[0] = domain->sublo[0];
@@ -1096,7 +1096,7 @@ void PairHippoGPU::polar_real()
                                              host_start, &ilist, &numneigh, cpu_time,
                                              success, aewald, felec, off2, atom->q,
                                              domain->boxlo, domain->prd, &tq_pinned);
-  
+
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
@@ -1156,11 +1156,11 @@ void PairHippoGPU::compute_force_from_torque(const numtyp* tq_ptr,
     vxx = xix*fix[0] + xiy*fiy[0] + xiz*fiz[0];
     vyy = yix*fix[1] + yiy*fiy[1] + yiz*fiz[1];
     vzz = zix*fix[2] + ziy*fiy[2] + ziz*fiz[2];
-    vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] + 
+    vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
                  xix*fix[1] + xiy*fiy[1] + xiz*fiz[1]);
-    vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] + 
+    vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
                  xix*fix[2] + xiy*fiy[2] + xiz*fiz[2]);
-    vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] + 
+    vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
                  yix*fix[2] + yiy*fiy[2] + yiz*fiz[2]);
 
     virial_comp[0] += vxx;