diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp
index 9045420425..84fbddd4e9 100644
--- a/lib/gpu/lal_base_charge.cpp
+++ b/lib/gpu/lal_base_charge.cpp
@@ -56,7 +56,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
                              const int max_nbors, const int maxspecial,
                              const double cell_size, const double gpu_split,
                              FILE *_screen, const void *pair_program,
-                             const char *k_name) {
+                             const char *k_name, const int disable_fast_math) {
   screen=_screen;
 
   int gpu_nbor=0;
@@ -83,7 +83,7 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
 
   _block_size=device->pair_block_size();
   _block_bio_size=device->block_bio_pair();
-  compile_kernels(*ucl_device,pair_program,k_name);
+  compile_kernels(*ucl_device,pair_program,k_name,disable_fast_math);
 
   if (_threads_per_atom>1 && gpu_nbor==0) {
     nbor->packing(true);
@@ -321,14 +321,20 @@ double BaseChargeT::host_memory_usage_atomic() const {
 
 template <class numtyp, class acctyp>
 void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
-                                  const char *kname) {
+                                  const char *kname,
+                                  const int disable_fast_math) {
   if (_compiled)
     return;
 
   std::string s_fast=std::string(kname)+"_fast";
   if (pair_program) delete pair_program;
   pair_program=new UCL_Program(dev);
-  std::string oclstring = device->compile_string()+" -DEVFLAG=1";
+  std::string device_compile_string;
+  if (disable_fast_math)
+    device_compile_string = device->compile_string_nofast();
+  else
+    device_compile_string = device->compile_string();
+  std::string oclstring = device_compile_string+" -DEVFLAG=1";
   pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen);
   k_pair_fast.set_function(*pair_program,s_fast.c_str());
   k_pair.set_function(*pair_program,kname);
@@ -336,7 +342,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
   q_tex.get_texture(*pair_program,"q_tex");
 
   #if defined(LAL_OCL_EV_JIT)
-  oclstring = device->compile_string()+" -DEVFLAG=0";
+  oclstring = device_compile_string+" -DEVFLAG=0";
   if (pair_program_noev) delete pair_program_noev;
   pair_program_noev=new UCL_Program(dev);
   pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen);
diff --git a/lib/gpu/lal_base_charge.h b/lib/gpu/lal_base_charge.h
index 6b8761092a..307c5c079f 100644
--- a/lib/gpu/lal_base_charge.h
+++ b/lib/gpu/lal_base_charge.h
@@ -44,6 +44,7 @@ class BaseCharge {
     * \param cell_size cutoff + skin
     * \param gpu_split fraction of particles handled by device
     * \param k_name name for the kernel for force calculation
+    * \param disable_fast_math override any fast math opts for kernel JIT
     *
     * Returns:
     * -  0 if successful
@@ -54,7 +55,8 @@ class BaseCharge {
   int init_atomic(const int nlocal, const int nall, const int max_nbors,
                   const int maxspecial, const double cell_size,
                   const double gpu_split, FILE *screen,
-                  const void *pair_program, const char *k_name);
+                  const void *pair_program, const char *k_name,
+                  const int disable_fast_math = 0);
 
   /// Estimate the overhead for GPU context changes and CPU driver
   void estimate_gpu_overhead(const int add_kernels=0);
@@ -198,7 +200,8 @@ class BaseCharge {
   double _gpu_overhead, _driver_overhead;
   UCL_D_Vec<int> *_nbor_data;
 
-  void compile_kernels(UCL_Device &dev, const void *pair_string, const char *k);
+  void compile_kernels(UCL_Device &dev, const void *pair_string,
+                       const char *k, const int disable_fast_math);
 
   virtual int loop(const int eflag, const int vflag) = 0;
 };
diff --git a/lib/gpu/lal_born_coul_wolf.cpp b/lib/gpu/lal_born_coul_wolf.cpp
index e6caebbab8..9aac866353 100644
--- a/lib/gpu/lal_born_coul_wolf.cpp
+++ b/lib/gpu/lal_born_coul_wolf.cpp
@@ -57,7 +57,7 @@ int BornCoulWolfT::init(const int ntypes, double **host_cutsq, double **host_rho
                         const double alf, const double e_shift, const double f_shift) {
   int success;
   success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
-                            _screen,born_coul_wolf,"k_born_coul_wolf");
+                            _screen,born_coul_wolf,"k_born_coul_wolf",1);
   if (success!=0)
     return success;
 
diff --git a/lib/gpu/lal_born_coul_wolf_cs.cpp b/lib/gpu/lal_born_coul_wolf_cs.cpp
index 8deceeb1f4..abd4da439a 100644
--- a/lib/gpu/lal_born_coul_wolf_cs.cpp
+++ b/lib/gpu/lal_born_coul_wolf_cs.cpp
@@ -42,7 +42,7 @@ int BornCoulWolfCST::init(const int ntypes, double **host_cutsq, double **host_r
                         const double alf, const double e_shift, const double f_shift) {
   int success;
   success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
-                            _screen,born_coul_wolf_cs,"k_born_coul_wolf_cs");
+                            _screen,born_coul_wolf_cs,"k_born_coul_wolf_cs",1);
   if (success!=0)
     return success;
 
diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp
index e2b5b9cdb5..0ff7125089 100644
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@@ -420,6 +420,16 @@ int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) {
   return 0;
 }
 
+template <class numtyp, class acctyp>
+std::string DeviceT::compile_string_nofast() {
+  std::string no_fast = _ocl_compile_string;
+  size_t p = no_fast.find("-cl-fast-relaxed-math ");
+  if (p != std::string::npos) no_fast.erase(p,22);
+  p = no_fast.find("-DFAST_MATH=");
+  if (p != std::string::npos) no_fast[p + 12]='0';
+  return no_fast;
+}
+
 template <class numtyp, class acctyp>
 int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
                   const bool rot, const int nlocal,
diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h
index 1db6ae3127..933a3508b5 100644
--- a/lib/gpu/lal_device.h
+++ b/lib/gpu/lal_device.h
@@ -312,6 +312,7 @@ class Device {
   }
 
   inline std::string compile_string() { return _ocl_compile_string; }
+  std::string compile_string_nofast();
   inline std::string ocl_config_name() { return _ocl_config_name; }
 
   template <class t>