diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu
index 6317ba8d94..f572d3ebd0 100644
--- a/lib/gpu/lal_amoeba.cu
+++ b/lib/gpu/lal_amoeba.cu
@@ -410,7 +410,7 @@ _texture( q_tex,int2);
 ------------------------------------------------------------------------- */
 
 __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
-                                 const __global numtyp *restrict extra,
+                                 const __global numtyp4 *restrict extra,
                                  const __global numtyp4 *restrict coeff,
                                  const __global numtyp4 *restrict sp_amoeba,
                                  const __global int *dev_nbor,
@@ -442,10 +442,10 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
 
   acctyp4 tq;
   tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
-
-  numtyp4* polar1 = (numtyp4*)(&extra[0]);
-  numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
+  
+  const __global numtyp4* polar1 = &extra[0];
+  const __global numtyp4* polar2 = &extra[nall];
+  const __global numtyp4* polar3 = &extra[2*nall];
 
   if (ii<inum) {
     int numj, nbor, nbor_end;
@@ -490,8 +490,6 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
       numtyp zr = jx.z - ix.z;
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
-      //if (r2>off2) continue;
-
       numtyp r = ucl_sqrt(r2);
       const numtyp4 pol1j = polar1[j];
       numtyp ck  = pol1j.x;  // rpole[j][0];
@@ -583,12 +581,12 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
       numtyp rr11 = (numtyp)9.0 * rr9 * r2inv;
 
       // calculate the real space Ewald error function terms
-      
+
       numtyp ralpha = aewald * r;
       numtyp exp2a = ucl_exp(-ralpha*ralpha);
       numtyp bn[6];
       bn[0] = ucl_erfc(ralpha) * rinv;
-      
+
       numtyp alsq2 = (numtyp)2.0 * aewald*aewald;
       numtyp alsq2n = (numtyp)0.0;
       if (aewald > (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald);
@@ -691,7 +689,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
-                                 const __global numtyp *restrict extra,
+                                 const __global numtyp4 *restrict extra,
                                  const __global numtyp4 *restrict coeff,
                                  const __global numtyp4 *restrict sp_amoeba,
                                  const __global int *dev_nbor,
@@ -707,14 +705,14 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
-  //local_allocate_store_charge();
+  local_allocate_store_ufld();
 
   acctyp _fieldp[6];
   for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
 
-  numtyp4* polar1 = (numtyp4*)(&extra[0]);
-  numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
+  const __global numtyp4* polar1 = &extra[0];
+  const __global numtyp4* polar2 = &extra[nall];
+  const __global numtyp4* polar3 = &extra[2*nall];
 
   if (ii<inum) {
     int numj, nbor, nbor_end;
@@ -885,7 +883,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
-                                 const __global numtyp *restrict extra,
+                                 const __global numtyp4 *restrict extra,
                                  const __global numtyp4 *restrict coeff,
                                  const __global numtyp4 *restrict sp_amoeba,
                                  const __global int *dev_nbor,
@@ -901,13 +899,14 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
+  local_allocate_store_ufld();
 
   acctyp _fieldp[6];
   for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
 
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
-  numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
-  numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
+  const __global numtyp4* polar3 = &extra[2*nall];
+  const __global numtyp4* polar4 = &extra[3*nall];
+  const __global numtyp4* polar5 = &extra[4*nall];
 
   if (ii<inum) {
     int numj, nbor, nbor_end;
@@ -949,8 +948,6 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
       numtyp zr = jx.z - ix.z;
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
-      //if (r2>off2) continue;
-
       numtyp r = ucl_sqrt(r2);
       numtyp rinv = ucl_rsqrt(r2);
       numtyp r2inv = rinv*rinv;
@@ -1049,7 +1046,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
-                             const __global numtyp *restrict extra,
+                             const __global numtyp4 *restrict extra,
                              const __global numtyp4 *restrict coeff,
                              const __global numtyp4 *restrict sp_amoeba,
                              const __global int *dev_nbor,
@@ -1068,7 +1065,6 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
-  local_allocate_store_ufld();
   local_allocate_store_charge();
 
   acctyp4 f;
@@ -1086,11 +1082,12 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
   for (int l=0; l<6; l++) dufld[l]=(acctyp)0;
 
   numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
-  numtyp4* polar1 = (numtyp4*)(&extra[0]);
-  numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
-  numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
-  numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
+  
+  const __global numtyp4* polar1 = &extra[0];
+  const __global numtyp4* polar2 = &extra[nall];
+  const __global numtyp4* polar3 = &extra[2*nall];
+  const __global numtyp4* polar4 = &extra[3*nall];
+  const __global numtyp4* polar5 = &extra[4*nall];
 
   if (ii<inum) {
     int itype,igroup;
diff --git a/lib/gpu/lal_atom.cpp b/lib/gpu/lal_atom.cpp
index 853fdf216d..72cb59a912 100644
--- a/lib/gpu/lal_atom.cpp
+++ b/lib/gpu/lal_atom.cpp
@@ -49,7 +49,7 @@ int AtomT::bytes_per_atom() const {
   if (_vel)
     bytes+=4*sizeof(numtyp);
   if (_extra_fields>0)
-    bytes+=_extra_fields*sizeof(numtyp);
+    bytes+=_extra_fields*sizeof(numtyp4);
   return bytes;
 }
 
diff --git a/lib/gpu/lal_atom.h b/lib/gpu/lal_atom.h
index 4b29d76cb1..771c2a3571 100644
--- a/lib/gpu/lal_atom.h
+++ b/lib/gpu/lal_atom.h
@@ -516,7 +516,7 @@ class Atom {
   /// Velocities
   UCL_Vector<numtyp,numtyp> v;
   /// Extras
-  UCL_Vector<numtyp,numtyp> extra;
+  UCL_Vector<numtyp4,numtyp4> extra;
 
   #ifdef GPU_CAST
   UCL_Vector<numtyp,numtyp> x_cast;
diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp
index e80fa01c2b..09d7386461 100644
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@@ -90,7 +90,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
   bool vel = false;
   _extra_fields = 24; // round up to accomodate quadruples of numtyp values
                       // rpole 13; uind 3; uinp 3; amtype, amgroup; pval
-  int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields);
+  int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields/4);
   if (success!=0)
     return success;
 
@@ -820,35 +820,35 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
   atom->extra_data_unavail();
 
   int _nall=atom->nall();
-  numtyp *pextra=reinterpret_cast<numtyp*>(&(atom->extra[0]));
+  numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(atom->extra[0]));
 
   int n = 0;
-  int nstride = 4;
+  int nstride = 1; //4;
   if (rpole) {
     for (int i = 0; i < _nall; i++) {
       int idx = n+i*nstride;
-      pextra[idx]   = rpole[i][0];
-      pextra[idx+1] = rpole[i][1];
-      pextra[idx+2] = rpole[i][2];
-      pextra[idx+3] = rpole[i][3];
+      pextra[idx].x = rpole[i][0];
+      pextra[idx].y = rpole[i][1];
+      pextra[idx].z = rpole[i][2];
+      pextra[idx].w = rpole[i][3];
     }
 
     n += nstride*_nall;
     for (int i = 0; i < _nall; i++) {
       int idx = n+i*nstride;
-      pextra[idx]   = rpole[i][4];
-      pextra[idx+1] = rpole[i][5];
-      pextra[idx+2] = rpole[i][6];
-      pextra[idx+3] = rpole[i][8];
+      pextra[idx].x = rpole[i][4];
+      pextra[idx].y = rpole[i][5];
+      pextra[idx].z = rpole[i][6];
+      pextra[idx].w = rpole[i][8];
     }
 
     n += nstride*_nall;
     for (int i = 0; i < _nall; i++) {
       int idx = n+i*nstride;
-      pextra[idx]   = rpole[i][9];
-      pextra[idx+1] = rpole[i][12];
-      pextra[idx+2] = (numtyp)amtype[i];
-      pextra[idx+3] = (numtyp)amgroup[i];
+      pextra[idx].x = rpole[i][9];
+      pextra[idx].y = rpole[i][12];
+      pextra[idx].z = (numtyp)amtype[i];
+      pextra[idx].w = (numtyp)amgroup[i];
     }
   } else {
     n += 2*nstride*_nall;
@@ -858,9 +858,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
   if (uind) {
     for (int i = 0; i < _nall; i++) {
       int idx = n+i*nstride;
-      pextra[idx]   = uind[i][0];
-      pextra[idx+1] = uind[i][1];
-      pextra[idx+2] = uind[i][2];
+      pextra[idx].x = uind[i][0];
+      pextra[idx].y = uind[i][1];
+      pextra[idx].z = uind[i][2];
+      pextra[idx].w = 0;
     }
   }
 
@@ -868,9 +869,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
   if (uinp) {
     for (int i = 0; i < _nall; i++) {
       int idx = n+i*nstride;
-      pextra[idx]   = uinp[i][0];
-      pextra[idx+1] = uinp[i][1];
-      pextra[idx+2] = uinp[i][2];
+      pextra[idx].x = uinp[i][0];
+      pextra[idx].y = uinp[i][1];
+      pextra[idx].z = uinp[i][2];
+      pextra[idx].w = 0;
     }
   }
 
@@ -878,7 +880,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
   if (pval) {
     for (int i = 0; i < _nall; i++) {
       int idx = n+i*nstride;
-      pextra[idx]   = pval[i];
+      pextra[idx].x = pval[i];
+      pextra[idx].y = 0;
+      pextra[idx].z = 0;
+      pextra[idx].w = 0;
     }
   }
 }
diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu
index 1611e8aece..99e20db223 100644
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@@ -410,7 +410,7 @@ _texture( q_tex,int2);
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
-                                const __global numtyp *restrict extra,
+                                const __global numtyp4 *restrict extra,
                                 const __global numtyp4 *restrict coeff_rep,
                                 const __global numtyp4 *restrict sp_nonpolar,
                                 const __global int *dev_nbor,
@@ -444,9 +444,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
   acctyp4 tq;
   tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
 
-  numtyp4* polar1 = (numtyp4*)(&extra[0]);
-  numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
+  const __global numtyp4* polar1 = &extra[0];
+  const __global numtyp4* polar2 = &extra[nall];
+  const __global numtyp4* polar3 = &extra[2*nall];
 
   if (ii<inum) {
     int numj, nbor, nbor_end;
@@ -495,8 +495,6 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
       numtyp zr = jx.z - ix.z;
       numtyp r2 = xr*xr + yr*yr + zr*zr;
 
-      if (r2>off2) continue;
-
       const numtyp4 pol1j = polar1[j];
       //numtyp ck  = pol1j.x;  // rpole[j][0];
       numtyp dkx = pol1j.y;    // rpole[j][1];
@@ -712,7 +710,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
-                                 const __global numtyp *restrict extra,
+                                 const __global numtyp4 *restrict extra,
                                  const __global numtyp4 *restrict coeff_amtype,
                                  const __global numtyp4 *restrict coeff_amclass,
                                  const __global numtyp4 *restrict sp_nonpolar,
@@ -741,7 +739,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
     for (int l=0; l<6; l++) virial[l]=(acctyp)0;
   }
 
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
+  const __global numtyp4* polar3 = &extra[2*nall];
 
   if (ii<inum) {
     int itype,iclass;
@@ -890,7 +888,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
-                                const __global numtyp *restrict extra,
+                                const __global numtyp4 *restrict extra,
                                 const __global numtyp4 *restrict coeff_amtype,
                                 const __global numtyp4 *restrict coeff_amclass,
                                 const __global numtyp4 *restrict sp_polar,
@@ -924,15 +922,12 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
   acctyp4 tq;
   tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
 
-  numtyp4* polar1 = (numtyp4*)(&extra[0]);
-  numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
-  numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
+  const __global numtyp4* polar1 = &extra[0];
+  const __global numtyp4* polar2 = &extra[nall];
+  const __global numtyp4* polar3 = &extra[2*nall];
+  const __global numtyp4* polar6 = &extra[5*nall];
 
   if (ii<inum) {
-    int m;
-    int itype,iclass;
-
     int numj, nbor, nbor_end;
     const __global int* nbor_mem=dev_packed;
     nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
@@ -960,9 +955,8 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
     const numtyp4 pol3i = polar3[i];
     numtyp qiyz = pol3i.x;   // rpole[i][9];
     numtyp qizz = pol3i.y;   // rpole[i][12];
-    itype  = pol3i.z;        // amtype[i];
-
-    iclass = coeff_amtype[itype].w;  // amtype2class[itype];
+    int itype  = pol3i.z;        // amtype[i];
+    int iclass = coeff_amtype[itype].w;  // amtype2class[itype];
     numtyp corei = coeff_amclass[iclass].z;  // pcore[iclass];
     numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
     numtyp vali = polar6[i].x;
@@ -1084,6 +1078,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
       numtyp alsq2n = (numtyp)0.0;
       if (aewald > (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald);
 
+      int m;
       for (m = 1; m < 6; m++) {
         numtyp bfac = (numtyp) (m+m-1);
         alsq2n = alsq2 * alsq2n;
@@ -1208,32 +1203,32 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
-                                const __global numtyp *restrict extra,
+                                const __global numtyp4 *restrict extra,
                                 const __global numtyp4 *restrict coeff_amtype,
                                 const __global numtyp4 *restrict coeff_amclass,
                                 const __global numtyp4 *restrict sp_polar,
                                 const __global int *dev_nbor,
-                                 const __global int *dev_packed,
-                                 const __global int *dev_short_nbor,
-                                 __global acctyp4 *restrict fieldp,
-                                 const int inum,  const int nall,
-                                 const int nbor_pitch, const int t_per_atom,
-                                 const numtyp aewald, const numtyp off2,
-                                 const numtyp polar_dscale, const numtyp polar_uscale)
+                                const __global int *dev_packed,
+                                const __global int *dev_short_nbor,
+                                __global acctyp4 *restrict fieldp,
+                                const int inum,  const int nall,
+                                const int nbor_pitch, const int t_per_atom,
+                                const numtyp aewald, const numtyp off2,
+                                const numtyp polar_dscale, const numtyp polar_uscale)
 {
   int tid, ii, offset, i;
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
-  //local_allocate_store_charge();
+  local_allocate_store_charge();
 
   acctyp _fieldp[6];
   for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
 
-  numtyp4* polar1 = (numtyp4*)(&extra[0]);
-  numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
-  numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
+  const __global numtyp4* polar1 = &extra[0];
+  const __global numtyp4* polar2 = &extra[nall];
+  const __global numtyp4* polar3 = &extra[2*nall];
+  const __global numtyp4* polar6 = &extra[5*nall];
 
   if (ii<inum) {
     int numj, nbor, nbor_end;
@@ -1388,7 +1383,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
-                                const __global numtyp *restrict extra,
+                                const __global numtyp4 *restrict extra,
                                 const __global numtyp4 *restrict coeff_amtype,
                                 const __global numtyp4 *restrict coeff_amclass,
                                 const __global numtyp4 *restrict sp_polar,
@@ -1405,14 +1400,14 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
-  //local_allocate_store_charge();
+  local_allocate_store_charge();
 
   acctyp _fieldp[6];
   for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
 
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
-  numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
-  numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
+  const __global numtyp4* polar3 = &extra[2*nall];
+  const __global numtyp4* polar4 = &extra[3*nall];
+  const __global numtyp4* polar5 = &extra[4*nall];
 
   if (ii<inum) {
     int numj, nbor, nbor_end;
@@ -1539,27 +1534,26 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
-                             const __global numtyp *restrict extra,
-                             const __global numtyp4 *restrict coeff_amtype,
-                             const __global numtyp4 *restrict coeff_amclass,
-                             const __global numtyp4 *restrict sp_polar,
-                             const __global int *dev_nbor,
-                             const __global int *dev_packed,
-                             const __global int *dev_short_nbor,
-                             __global acctyp4 *restrict ans,
-                             __global acctyp *restrict engv,
-                             __global acctyp4 *restrict tep,
-                             const int eflag, const int vflag, const int inum,
-                             const int nall, const int nbor_pitch, const int t_per_atom,
-                             const numtyp aewald, const numtyp felec,
-                             const numtyp off2, const numtyp polar_dscale,
-                             const numtyp polar_uscale)
+                            const __global numtyp4 *restrict extra,
+                            const __global numtyp4 *restrict coeff_amtype,
+                            const __global numtyp4 *restrict coeff_amclass,
+                            const __global numtyp4 *restrict sp_polar,
+                            const __global int *dev_nbor,
+                            const __global int *dev_packed,
+                            const __global int *dev_short_nbor,
+                            __global acctyp4 *restrict ans,
+                            __global acctyp *restrict engv,
+                            __global acctyp4 *restrict tep,
+                            const int eflag, const int vflag, const int inum,
+                            const int nall, const int nbor_pitch, const int t_per_atom,
+                            const numtyp aewald, const numtyp felec,
+                            const numtyp off2, const numtyp polar_dscale,
+                            const numtyp polar_uscale)
 {
   int tid, ii, offset, i;
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
-  local_allocate_store_ufld();
   local_allocate_store_charge();
 
   acctyp4 f;
@@ -1577,14 +1571,13 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
   for (int l=0; l<6; l++) dufld[l]=(acctyp)0;
 
   numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
-  numtyp4* polar1 = (numtyp4*)(&extra[0]);
-  numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
-  numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
-  numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
-  numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
-  numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
 
-  //numtyp4 xi__;
+  const __global numtyp4* polar1 = &extra[0];
+  const __global numtyp4* polar2 = &extra[nall];
+  const __global numtyp4* polar3 = &extra[2*nall];
+  const __global numtyp4* polar4 = &extra[3*nall];
+  const __global numtyp4* polar5 = &extra[4*nall];
+  const __global numtyp4* polar6 = &extra[5*nall];
 
   if (ii<inum) {
     int itype,igroup;
@@ -1644,7 +1637,6 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
       numtyp yr = jx.y - ix.y;
       numtyp zr = jx.z - ix.z;
       numtyp r2 = xr*xr + yr*yr + zr*zr;
-
       numtyp r = ucl_sqrt(r2);
 
       const numtyp4 pol1j = polar1[j];