diff --git a/src/USER-INTEL/dihedral_charmm_intel.cpp b/src/USER-INTEL/dihedral_charmm_intel.cpp
index 82c5bc77db..7e93e319d9 100644
--- a/src/USER-INTEL/dihedral_charmm_intel.cpp
+++ b/src/USER-INTEL/dihedral_charmm_intel.cpp
@@ -178,6 +178,11 @@ void DihedralCharmmIntel::eval(const int vflag,
       }
     }
 
+    #if defined(LMP_SIMD_COMPILER_TEST)
+    #pragma vector aligned
+    #pragma simd reduction(+:sedihedral, sevdwl, secoul, sv0, sv1, sv2, \
+                           sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, spv5) 
+    #endif
     for (int n = nfrom; n < nto; n++) {
       const int i1 = dihedrallist[n].a;
       const int i2 = dihedrallist[n].b;
@@ -237,6 +242,7 @@ void DihedralCharmmIntel::eval(const int vflag,
       const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
 
       // error check
+      #ifndef LMP_SIMD_COMPILER_TEST
       if (c > PTOLERANCE || c < MTOLERANCE) {
 	int me = comm->me;
 
@@ -258,6 +264,7 @@ void DihedralCharmmIntel::eval(const int vflag,
 		  me,x[i4].x,x[i4].y,x[i4].z);
 	}
       }
+      #endif
 
       if (c > (flt_t)1.0) c = (flt_t)1.0;
       if (c < (flt_t)-1.0) c = (flt_t)-1.0;
@@ -337,6 +344,9 @@ void DihedralCharmmIntel::eval(const int vflag,
       }
 
 
+      #if defined(LMP_SIMD_COMPILER_TEST)
+      #pragma simdoff
+      #endif
       {
         if (NEWTON_BOND || i2 < nlocal) {
 	  f[i2].x += f2x;
@@ -413,6 +423,9 @@ void DihedralCharmmIntel::eval(const int vflag,
       }
 
       // apply force to each of 4 atoms
+      #if defined(LMP_SIMD_COMPILER_TEST)
+      #pragma simdoff
+      #endif
       {
         if (NEWTON_BOND || i1 < nlocal) {
 	  f[i1].x += f1x;
@@ -668,7 +681,7 @@ void DihedralCharmmIntel::eval(const int vflag,
       const SIMD_flt_t tcos_shift = SIMD_gather(nmask, cos_shift, type);
       const SIMD_flt_t tsin_shift = SIMD_gather(nmask, sin_shift, type);
       const SIMD_flt_t tk = SIMD_gather(nmask, k, type);
-      const SIMD_int m = SIMD_gather(nmask, multiplicity, type);
+      const SIMD_int m = SIMD_gatherz_offset<flt_t>(nmask, multiplicity, type);
 
       SIMD_flt_t p(one);
       SIMD_flt_t ddf1(szero);
diff --git a/src/USER-INTEL/intel_simd.h b/src/USER-INTEL/intel_simd.h
index 3bc99c790f..ac13f1edfd 100644
--- a/src/USER-INTEL/intel_simd.h
+++ b/src/USER-INTEL/intel_simd.h
@@ -194,6 +194,37 @@ namespace ip_simd {
 				      _MM_SCALE_8);
   }
 
+  template <typename T>
+  inline SIMD_int SIMD_gatherz_offset(const SIMD_mask &m, const int *p,
+				      const SIMD_int &i) {
+  }
+
+  template <>
+  inline SIMD_int SIMD_gatherz_offset<float>(const SIMD_mask &m, const int *p,
+					     const SIMD_int &i) {
+    return _mm512_mask_i32gather_epi32( _mm512_set1_epi32(0), m, i, p,
+				       _MM_SCALE_4);
+  }
+
+  template <>
+  inline SIMD_int SIMD_gatherz_offset<double>(const SIMD_mask &m, const int *p,
+					      const SIMD_int &i) {
+    return _mm512_mask_i32gather_epi32( _mm512_set1_epi32(0), m, i, p,
+				       _MM_SCALE_8);
+  }
+
+  inline SIMD_float SIMD_gatherz(const SIMD_mask &m, const float *p,
+				 const SIMD_int &i) {
+    return _mm512_mask_i32gather_ps( _mm512_set1_ps((float)0), m, i, p,
+				    _MM_SCALE_4);
+  }
+
+  inline SIMD_double SIMD_gatherz(const SIMD_mask &m, const double *p,
+				  const SIMD_int &i) {
+    return _mm512_mask_i32logather_pd( _mm512_set1_pd(0.0), m, i, p,
+				      _MM_SCALE_8);
+  }
+
   // ------- Store Operations
   
   inline void SIMD_store(int *p, const SIMD_int &one) {