silence compiler warnings about unused parameters and variables

2023-01-19 08:56:54 -05:00
parent 3ae2805316
commit 4244d2e6cd
27 changed files with 164 additions and 164 deletions
--- a/lib/gpu/geryon/hip_macros.h
+++ b/lib/gpu/geryon/hip_macros.h
@ -26,6 +26,9 @@
 #ifdef UCL_DEBUG
 #define UCL_SYNC_DEBUG
 #define UCL_DESTRUCT_CHECK
+#define UCL_DEBUG_ARG(arg) arg
+#else
+#define UCL_DEBUG_ARG(arg)
 #endif

 #ifndef UCL_NO_API_CHECK
--- a/lib/gpu/geryon/nvd_macros.h
+++ b/lib/gpu/geryon/nvd_macros.h
@ -33,6 +33,9 @@
 #ifdef UCL_DEBUG
 #define UCL_SYNC_DEBUG
 #define UCL_DESTRUCT_CHECK
+#define UCL_DEBUG_ARG(arg) arg
+#else
+#define UCL_DEBUG_ARG(arg)
 #endif

 #ifndef UCL_NO_API_CHECK
--- a/lib/gpu/geryon/ocl_device.h
+++ b/lib/gpu/geryon/ocl_device.h
@ -309,15 +309,14 @@ class UCL_Device {
  /// Return the maximum memory pitch in bytes for current device
  inline size_t max_pitch() { return max_pitch(_device); }
  /// Return the maximum memory pitch in bytes
-  inline size_t max_pitch(const int i) { return 0; }
+  inline size_t max_pitch(const int) { return 0; }

  /// Returns false if accelerator cannot be shared by multiple processes
  /** If it cannot be determined, true is returned **/
  inline bool sharing_supported() { return sharing_supported(_device); }
  /// Returns false if accelerator cannot be shared by multiple processes
  /** If it cannot be determined, true is returned **/
-  inline bool sharing_supported(const int i)
-    { return true; }
+  inline bool sharing_supported(const int) { return true; }

  /// True if the device is a sub-device
  inline bool is_subdevice()
--- a/lib/gpu/geryon/ocl_macros.h
+++ b/lib/gpu/geryon/ocl_macros.h
@ -33,6 +33,9 @@
 #ifdef UCL_DEBUG
 #define UCL_SYNC_DEBUG
 #define UCL_DESTRUCT_CHECK
+#define UCL_DEBUG_ARG(arg) arg
+#else
+#define UCL_DEBUG_ARG(arg)
 #endif

 #ifndef UCL_NO_API_CHECK
--- a/lib/gpu/geryon/ocl_memory.h
+++ b/lib/gpu/geryon/ocl_memory.h
@ -137,7 +137,7 @@ inline int _host_view(mat_type &mat, copy_type &cm, const size_t o,

 template <class mat_type>
 inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
-                       const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
+                       const enum UCL_MEMOPT kind, const enum UCL_MEMOPT /*kind2*/){
  cl_mem_flags buffer_perm;
  cl_map_flags map_perm;
  if (kind==UCL_READ_ONLY) {
@ -583,7 +583,7 @@ template <> struct _ucl_memcpy<1,0> {
  template <class p1, class p2>
  static inline void mc(p1 &dst, const p2 &src, const size_t n,
                        cl_command_queue &cq, const cl_bool block,
-                        const size_t dst_offset, const size_t src_offset) {
+                        const size_t /*dst_offset*/, const size_t src_offset) {
    if (src.cbegin()==dst.cbegin()) {
      #ifdef UCL_DBG_MEM_TRACE
      std::cerr << "UCL_COPY 1S\n";
@ -641,7 +641,7 @@ template <> struct _ucl_memcpy<0,1> {
  template <class p1, class p2>
  static inline void mc(p1 &dst, const p2 &src, const size_t n,
                        cl_command_queue &cq, const cl_bool block,
-                        const size_t dst_offset, const size_t src_offset) {
+                        const size_t dst_offset, const size_t /*src_offset*/) {
    if (src.cbegin()==dst.cbegin()) {
      if (block) ucl_sync(cq);
      #ifdef UCL_DBG_MEM_TRACE
--- a/lib/gpu/geryon/ocl_texture.h
+++ b/lib/gpu/geryon/ocl_texture.h
@ -35,19 +35,19 @@ class UCL_Texture {
  UCL_Texture() {}
  ~UCL_Texture() {}
  /// Construct with a specified texture reference
-  inline UCL_Texture(UCL_Program &prog, const char *texture_name) { }
+  inline UCL_Texture(UCL_Program & /*prog*/, const char * /*texture_name*/) { }
  /// Set the texture reference for this object
-  inline void get_texture(UCL_Program &prog, const char *texture_name) { }
+  inline void get_texture(UCL_Program & /*prog*/, const char * /*texture_name*/) { }

  /// Bind a float array where each fetch grabs a vector of length numel
  template<class mat_typ>
-  inline void bind_float(mat_typ &vec, const unsigned numel) { }
+    inline void bind_float(mat_typ & /*vec*/, const unsigned /*numel*/) { }

  /// Unbind the texture reference from the memory allocation
  inline void unbind() { }

  /// Make a texture reference available to kernel
-  inline void allow(UCL_Kernel &kernel) { }
+  inline void allow(UCL_Kernel & /*kernel*/) { }

 private:
  friend class UCL_Kernel;
@ -62,7 +62,7 @@ class UCL_Const {
  inline UCL_Const(UCL_Program &prog, const char *global_name)
    { get_global(prog,global_name); }
  /// Set the global reference for this object
-  inline void get_global(UCL_Program &prog, const char *global_name) {
+  inline void get_global(UCL_Program &prog, const char * /*global_name*/) {
    if (_active) {
      CL_DESTRUCT_CALL(clReleaseContext(_context));
      CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
--- a/lib/gpu/geryon/ocl_timer.h
+++ b/lib/gpu/geryon/ocl_timer.h
@ -71,7 +71,7 @@ class UCL_Timer {
  inline void init(UCL_Device &dev) { init(dev,dev.cq()); }

  /// Initialize command queue for timing
-  inline void init(UCL_Device &dev, command_queue &cq) {
+  inline void init(UCL_Device & /*dev*/, command_queue &cq) {
    clear();
    _cq=cq;
    clRetainCommandQueue(_cq);
--- a/lib/gpu/geryon/ucl_copy.h
+++ b/lib/gpu/geryon/ucl_copy.h
@ -205,12 +205,11 @@ template <> struct _host_host_copy<1,1> {
 // Should never be here
 template <int host_t1, int host_t2> struct _host_host_copy {
  template <class mat1, class mat2>
-  static inline void hhc(mat1 &dst, const mat2 &src, const size_t numel) {
+  static inline void hhc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/) {
    assert(0==1);
  }
  template <class mat1, class mat2>
-  static inline void hhc(mat1 &dst, const mat2 &src, const size_t rows,
-                         const size_t cols) {
+  static inline void hhc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/, const size_t /*cols*/) {
    assert(0==1);
  }
 };
@ -470,24 +469,22 @@ template <int host_type1> struct _ucl_cast_copy<host_type1,1> {
 // Neither on host or both on host
 template <> struct _ucl_cast_copy<1,1> {
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
-                        mat3 &cast_buffer, command_queue &cq) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/,
+                          mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
    assert(0==1);
  }
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
-                        mat3 &cast_buffer) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/, mat3 & /*cast_buffer*/) {
    assert(0==1);
  }
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
-                        const size_t cols, mat3 &cast_buffer) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
+                          const size_t /*cols*/, mat3 & /*cast_buffer*/) {
    assert(0==1);
  }
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
-                        const size_t cols, mat3 &cast_buffer,
-                        command_queue &cq) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
+                          const size_t /*cols*/, mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
    assert(0==1);
  }
 };
@ -495,24 +492,22 @@ template <> struct _ucl_cast_copy<1,1> {
 // Neither on host or both on host
 template <> struct _ucl_cast_copy<0,0> {
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
-                        mat3 &cast_buffer, command_queue &cq) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/,
+                          mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
    assert(0==1);
  }
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
-                        mat3 &cast_buffer) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*numel*/, mat3 & /*cast_buffer*/) {
    assert(0==1);
  }
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
-                        const size_t cols, mat3 &cast_buffer) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
+                          const size_t /*cols*/, mat3 & /*cast_buffer*/) {
    assert(0==1);
  }
  template <class mat1, class mat2, class mat3>
-  static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
-                        const size_t cols, mat3 &cast_buffer,
-                        command_queue &cq) {
+    static inline void cc(mat1 & /*dst*/, const mat2 & /*src*/, const size_t /*rows*/,
+                          const size_t cols, mat3 & /*cast_buffer*/, command_queue & /*cq*/) {
    assert(0==1);
  }
 };
--- a/lib/gpu/geryon/ucl_d_vec.h
+++ b/lib/gpu/geryon/ucl_d_vec.h
@ -125,7 +125,7 @@ class UCL_D_Vec : public UCL_BaseMat {
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs **/
  template <class ucl_type>
-  inline void view(ucl_type &input, const size_t rows, const size_t cols) {
+  inline void view(ucl_type &input, const size_t UCL_DEBUG_ARG(rows), const size_t cols) {
    #ifdef UCL_DEBUG
    assert(rows==1);
    #endif
@ -230,8 +230,8 @@ class UCL_D_Vec : public UCL_BaseMat {
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs **/
  template <class ucl_type>
-  inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
-                          const size_t cols) {
+  inline void view_offset(const size_t offset,ucl_type &input,
+                          const size_t UCL_DEBUG_ARG(rows), const size_t cols) {
    #ifdef UCL_DEBUG
    assert(rows==1);
    #endif
--- a/lib/gpu/geryon/ucl_h_vec.h
+++ b/lib/gpu/geryon/ucl_h_vec.h
@ -126,7 +126,7 @@ class UCL_H_Vec : public UCL_BaseMat {
    *   allocating container when using CUDA APIs
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
-  inline void view(ucl_type &input, const size_t rows, const size_t cols) {
+  inline void view(ucl_type &input, const size_t UCL_DEBUG_ARG(rows), const size_t cols) {
    #ifdef UCL_DEBUG
    assert(rows==1);
    #endif
@ -188,7 +188,7 @@ class UCL_H_Vec : public UCL_BaseMat {
    *   allocating container when using CUDA APIs
    * - Viewing a device pointer on the host is not supported **/
  template <class ptr_type>
-  inline void view(ptr_type *input, const size_t rows, const size_t cols,
+  inline void view(ptr_type *input, const size_t UCL_DEBUG_ARG(rows), const size_t cols,
                   UCL_Device &dev) {
    #ifdef UCL_DEBUG
    assert(rows==1);
@ -233,7 +233,7 @@ class UCL_H_Vec : public UCL_BaseMat {
    *   allocating container when using CUDA APIs
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
-  inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
+  inline void view_offset(const size_t offset,ucl_type &input,const size_t UCL_DEBUG_ARG(rows),
                          const size_t cols) {
    #ifdef UCL_DEBUG
    assert(rows==1);
--- a/lib/gpu/geryon/ucl_s_obj_help.h
+++ b/lib/gpu/geryon/ucl_s_obj_help.h
@ -27,7 +27,7 @@ template <int st> struct _ucl_s_obj_help;
 // -- Can potentially use same memory if shared by accelerator
 template <> struct _ucl_s_obj_help<1> {
  template <class t1, class t2, class t3>
-  static inline int alloc(t1 &host, t2 &device, t3 &_buffer,
+    static inline int alloc(t1 &host, t2 &device, t3 & /*_buffer*/,
                          const int cols, UCL_Device &acc,
                          const enum UCL_MEMOPT kind1,
                          const enum UCL_MEMOPT kind2) {
@ -131,41 +131,37 @@ template <> struct _ucl_s_obj_help<1> {
  }

  template <class t1, class t2, class t3>
-  static inline void copy(t1 &dst, t2 &src, t3 &buffer, const bool async) {
+    static inline void copy(t1 &dst, t2 &src, t3 & /*buffer*/, const bool async) {
    ucl_copy(dst,src,async);
  }

  template <class t1, class t2, class t3>
-  static inline void copy(t1 &dst, t2 &src, t3 &buffer, command_queue &cq) {
+    static inline void copy(t1 &dst, t2 &src, t3 & /*buffer*/, command_queue &cq) {
    ucl_copy(dst,src,cq);
  }

  template <class t1, class t2, class t3>
-  static inline void copy(t1 &dst, t2 &src, const int cols, t3 &buffer,
-                          const bool async) {
+    static inline void copy(t1 &dst, t2 &src, const int cols, t3 & /*buffer*/, const bool async) {
    ucl_copy(dst,src,cols,async);
  }

  template <class t1, class t2, class t3>
-  static inline void copy(t1 &dst, t2 &src, const int cols, t3 &buffer,
-                          command_queue &cq) {
+    static inline void copy(t1 &dst, t2 &src, const int cols, t3 & /*buffer*/, command_queue &cq) {
    ucl_copy(dst,src,cols,cq);
  }

  template <class t1, class t2, class t3>
-  static inline void copy(t1 &dst, t2 &src, const int rows, const int cols,
-                          t3 &buffer, const bool async) {
+    static inline void copy(t1 &dst, t2 &src, const int rows, const int cols, t3 & /*buffer*/, const bool async) {
    ucl_copy(dst,src,rows,cols,async);
  }

  template <class t1, class t2, class t3>
-  static inline void copy(t1 &dst, t2 &src, const int rows, const int cols,
-                          t3 &buffer, command_queue &cq) {
+  static inline void copy(t1 &dst, t2 &src, const int rows, const int cols, t3 & /*buffer*/, command_queue &cq) {
    ucl_copy(dst,src,rows,cols,cq);
  }

  template <class t1, class t2, class t3>
-  static inline int dev_resize(t1 &device, t2 &host, t3 &buff,const int cols) {
+    static inline int dev_resize(t1 &device, t2 &host, t3 & /*buff*/,const int cols) {
    if (device.kind()==UCL_VIEW) {
      device.view(host);
      return UCL_SUCCESS;
@ -353,7 +349,7 @@ template <int st> struct _ucl_s_obj_help {
  }

  template <class t1, class t2, class t3>
-  static inline int dev_resize(t1 &device, t2 &host, t3 &buff,const int cols) {
+  static inline int dev_resize(t1 &device, t2 & /*host*/, t3 &buff,const int cols) {
    int err=buff.resize(cols);
    if (err!=UCL_SUCCESS)
      return err;
--- a/lib/gpu/lal_amoeba.cpp
+++ b/lib/gpu/lal_amoeba.cpp
@ -48,10 +48,10 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
                  const double *host_pdamp, const double *host_thole,
                  const double *host_dirdamp, const int *host_amtype2class,
                  const double *host_special_hal,
-                  const double *host_special_repel,
-                  const double *host_special_disp,
+                  const double * /*host_special_repel*/,
+                  const double * /*host_special_disp*/,
                  const double *host_special_mpole,
-                  const double *host_special_polar_wscale,
+                  const double * /*host_special_polar_wscale*/,
                  const double *host_special_polar_piscale,
                  const double *host_special_polar_pscale,
                  const double *host_csix, const double *host_adisp,
@ -188,7 +188,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
 // Launch the real-space permanent field kernel
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int AmoebaT::udirect2b(const int eflag, const int vflag) {
+int AmoebaT::udirect2b(const int /*eflag*/, const int /*vflag*/) {
  int ainum=this->ans->inum();
  if (ainum == 0)
    return 0;
@ -230,7 +230,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
 // Launch the real-space induced field kernel, returning field and fieldp
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int AmoebaT::umutual2b(const int eflag, const int vflag) {
+int AmoebaT::umutual2b(const int /*eflag*/, const int /*vflag*/) {
  int ainum=this->ans->inum();
  if (ainum == 0)
    return 0;
--- a/lib/gpu/lal_amoeba_ext.cpp
+++ b/lib/gpu/lal_amoeba_ext.cpp
@ -119,8 +119,8 @@ void amoeba_gpu_clear() {

 int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall,
                            double **host_x, int *host_type, int *host_amtype,
-                            int *host_amgroup, double **host_rpole,
-                            double **host_uind, double **host_uinp, double *host_pval,
+                            int *host_amgroup, double **host_rpole, double ** /*host_uind*/,
+                            double ** /*host_uinp*/, double * /*host_pval*/,
                            double *sublo, double *subhi, tagint *tag,
                            int **nspecial, tagint **special,
                            int *nspecial15, tagint **special15,
--- a/lib/gpu/lal_atom.cpp
+++ b/lib/gpu/lal_atom.cpp
@ -403,9 +403,14 @@ double AtomT::host_memory_usage() const {
  return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
 }

+#ifdef USE_CUDPP
+#define USE_CUDPP_ARG(arg) arg
+#else
+#define USE_CUDPP_ARG(arg)
+#endif
 // Sort arrays for neighbor list calculation
 template <class numtyp, class acctyp>
-void AtomT::sort_neighbor(const int num_atoms) {
+void AtomT::sort_neighbor(const int USE_CUDPP_ARG(num_atoms)) {
  #ifdef USE_CUDPP
  CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(),
                                 (int *)dev_particle_id.begin(),
--- a/lib/gpu/lal_atom.h
+++ b/lib/gpu/lal_atom.h
@ -327,7 +327,7 @@ class Atom {

  /// Copy positions and types to device asynchronously
  /** Copies nall() elements **/
-  inline void add_x_data(double **host_ptr, int *host_type) {
+  inline void add_x_data(double ** /*host_ptr*/, int * /*host_type*/) {
    time_pos.start();
    if (_x_avail==false) {
      #ifdef GPU_CAST
@ -441,7 +441,7 @@ class Atom {

  /// Copy velocities and tags to device asynchronously
  /** Copies nall() elements **/
-  inline void add_v_data(double **host_ptr, tagint *host_tag) {
+  inline void add_v_data(double ** /*host_ptr*/, tagint * /*host_tag*/) {
    time_vel.start();
    if (_v_avail==false) {
      #ifdef GPU_CAST
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@ -288,7 +288,7 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
                              const bool eflag_in, const bool vflag_in,
                              const bool eatom, const bool vatom, int &host_start,
                              int **&ilist, int **&jnum, const double cpu_time,
-                              bool &success, double *host_q, double *boxlo, double *prd) {
+                              bool &success, double *host_q, double * /*boxlo*/, double * /*prd*/) {
  acc_timers();
  if (eatom) _eflag=2;
  else if (eflag_in) _eflag=1;
@ -368,20 +368,21 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
 //   this is the first part in a time step done on the GPU for AMOEBA for now
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
-                                          const int nall, double **host_x,
-                                          int *host_type, int *host_amtype,
-                                          int *host_amgroup, double **host_rpole, double *host_pval,
-                                          double *sublo, double *subhi, tagint *tag,
-                                          int **nspecial, tagint **special,
-                                          int *nspecial15, tagint **special15,
-                                          const bool eflag_in, const bool vflag_in,
-                                          const bool eatom, const bool vatom,
-                                          int &host_start, int **ilist, int **jnum,
-                                          const double cpu_time, bool &success,
+void BaseAmoebaT::compute_multipole_real(const int /*ago*/, const int inum_full,
+                                         const int /*nall*/, double ** /*host_x*/,
+                                         int * /*host_type*/, int * /*host_amtype*/,
+                                         int * /*host_amgroup*/, double ** /*host_rpole*/,
+                                         double */*host_pval*/, double * /*sublo*/,
+                                         double * /*subhi*/, tagint * /*tag*/,
+                                         int ** /*nspecial*/, tagint ** /*special*/,
+                                         int * /*nspecial15*/, tagint ** /*special15*/,
+                                         const bool /*eflag_in*/, const bool /*vflag_in*/,
+                                         const bool /*eatom*/, const bool /*vatom*/,
+                                         int & /*host_start*/, int ** /*ilist*/, int ** /*jnum*/,
+                                         const double /*cpu_time*/, bool & /*success*/,
                                         const double aewald, const double felec,
-                                          const double off2_mpole, double *host_q,
-                                          double *boxlo, double *prd, void **tep_ptr) {
+                                         const double off2_mpole, double * /*host_q*/,
+                                         double * /*boxlo*/, double * /*prd*/, void **tep_ptr) {
  // ------------------- Resize _tep array ------------------------

  if (inum_full>_max_tep_size) {
@ -393,7 +394,7 @@ void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
  _off2_mpole = off2_mpole;
  _felec = felec;
  _aewald = aewald;
-  const int red_blocks=multipole_real(_eflag,_vflag);
+  multipole_real(_eflag,_vflag);

  // leave the answers (forces, energies and virial) on the device,
  //   only copy them back in the last kernel (polar_real)
@ -424,7 +425,7 @@ void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double
  // specify the correct cutoff and alpha values
  _off2_polar = off2_polar;
  _aewald = aewald;
-  const int red_blocks=udirect2b(_eflag,_vflag);
+  udirect2b(_eflag,_vflag);

  // copy field and fieldp from device to host (_fieldp store both arrays, one after another)

@ -436,10 +437,10 @@ void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double
 //    of the induced field
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole,
-                                     double **host_uind, double **host_uinp, double *host_pval,
+void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double ** /*host_rpole*/,
+                                    double **host_uind, double **host_uinp, double * /*host_pval*/,
                                    const double aewald, const double off2_polar,
-                                     void** fieldp_ptr) {
+                                    void** /*fieldp_ptr*/) {
  // only copy the necessary data arrays that are updated over the iterations
  // use nullptr for the other arrays that are already copied from host to device
  cast_extra_data(host_amtype, host_amgroup, nullptr, host_uind, host_uinp, nullptr);
@ -449,7 +450,7 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double
  _off2_polar = off2_polar;
  _aewald = aewald;
  // launch the kernel
-  const int red_blocks=umutual2b(_eflag,_vflag);
+  umutual2b(_eflag,_vflag);

  // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
  // NOTE: move this step to update_fieldp() to delay device-host transfer
@ -492,7 +493,7 @@ void BaseAmoebaT::precompute_kspace(const int inum_full, const int bsorder,
    _fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE);
    _fdip_sum_phi.alloc(_max_thetai_size*20,*(this->ucl_device),UCL_READ_WRITE);
  } else {
-    if (_thetai1.cols()<_max_thetai_size*bsorder) {
+    if ((int)_thetai1.cols()<_max_thetai_size*bsorder) {
      _max_thetai_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
      _thetai1.resize(_max_thetai_size*bsorder);
      _thetai2.resize(_max_thetai_size*bsorder);
@ -573,7 +574,7 @@ void BaseAmoebaT::precompute_kspace(const int inum_full, const int bsorder,
  int numel = _num_grid_points;
  if (_cgrid_brick.cols() == 0) {
    _cgrid_brick.alloc(numel, *(this->ucl_device), UCL_READ_WRITE, UCL_READ_ONLY);
-  } else if (numel > _cgrid_brick.cols()) {
+  } else if (numel > (int)_cgrid_brick.cols()) {
    _cgrid_brick.resize(numel);
  }
 }
@ -611,7 +612,7 @@ void BaseAmoebaT::compute_fphi_uind(double ****host_grid_brick,
  #endif

  // launch the kernel with its execution configuration (see below)
-  const int red_blocks = fphi_uind();
+  fphi_uind();

  // copy data from device to host asynchronously
  _fdip_phi1.update_host(_max_thetai_size*10, true);
@ -682,7 +683,7 @@ void BaseAmoebaT::compute_fphi_mpole(double ***host_grid_brick, void **host_fphi
  _cgrid_brick.update_device(_num_grid_points, false);

  _felec = felec;
-  const int red_blocks = fphi_mpole();
+  fphi_mpole();

  _fdip_sum_phi.update_host(_max_thetai_size*20);

@ -698,9 +699,6 @@ int BaseAmoebaT::fphi_mpole() {
  if (ainum == 0)
    return 0;

-  int _nall=atom->nall();
-  int nbor_pitch=nbor->nbor_pitch();
-
  // Compute the block size and grid size to keep all cores busy

  const int BX=block_size();
@ -771,7 +769,7 @@ double BaseAmoebaT::host_memory_usage_atomic() const {
 // ---------------------------------------------------------------------------

 template <class numtyp, class acctyp>
-void BaseAmoebaT::setup_fft(const int numel, const int element_type)
+void BaseAmoebaT::setup_fft(const int /*numel*/, const int /*element_type*/)
 {
  // TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
 }
@ -781,7 +779,8 @@ void BaseAmoebaT::setup_fft(const int numel, const int element_type)
 // ---------------------------------------------------------------------------

 template <class numtyp, class acctyp>
-void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int mode)
+void BaseAmoebaT::compute_fft1d(void * /*in*/, void * /*out*/,
+                                const int /*numel*/, const int /*mode*/)
 {
  // TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
  #if 0 // !defined(USE_OPENCL) && !defined(USE_HIP)
@ -940,7 +939,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,

  #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
  if (dev.has_subgroup_support()) {
-    size_t mx_subgroup_sz = k_polar.max_subgroup_size(_block_size);
+    int mx_subgroup_sz = k_polar.max_subgroup_size(_block_size);
    if (_threads_per_atom > mx_subgroup_sz)
      _threads_per_atom = mx_subgroup_sz;
    device->set_simd_size(mx_subgroup_sz);
--- a/lib/gpu/lal_base_amoeba.h
+++ b/lib/gpu/lal_base_amoeba.h
@ -280,7 +280,7 @@ class BaseAmoeba {
  UCL_Kernel k_fphi_uind, k_fphi_mpole;
  UCL_Kernel k_special15, k_short_nbor;
  inline int block_size() { return _block_size; }
-  inline void set_kernel(const int eflag, const int vflag) {}
+  inline void set_kernel(const int /*eflag*/, const int /*vflag*/) {}

  // --------------------------- TEXTURES -----------------------------
  UCL_Texture pos_tex;
--- a/lib/gpu/lal_base_dpd.cpp
+++ b/lib/gpu/lal_base_dpd.cpp
@ -196,7 +196,7 @@ void BaseDPDT::compute(const int f_ago, const int inum_full, const int nall,
                       const double cpu_time, bool &success, tagint *tag,
                       double **host_v, const double dtinvsqrt,
                       const int seed, const int timestep,
-                       const int nlocal, double *boxlo, double *prd) {
+                       const int /*nlocal*/, double * /*boxlo*/, double * /*prd*/) {
  acc_timers();
  int eflag, vflag;
  if (eatom) eflag=2;
@ -261,7 +261,7 @@ int** BaseDPDT::compute(const int ago, const int inum_full,
                        const double cpu_time, bool &success,
                        double **host_v, const double dtinvsqrt,
                        const int seed, const int timestep,
-                        double *boxlo, double *prd) {
+                        double * /*boxlo*/, double * /*prd*/) {
  acc_timers();
  int eflag, vflag;
  if (eatom) eflag=2;
--- a/lib/gpu/lal_charmm_long.cpp
+++ b/lib/gpu/lal_charmm_long.cpp
@ -44,18 +44,14 @@ int CHARMMLongT::bytes_per_atom(const int max_nbors) const {
 }

 template <class numtyp, class acctyp>
-int CHARMMLongT::init(const int ntypes,
-                           double host_cut_bothsq, double **host_lj1,
-                           double **host_lj2, double **host_lj3,
-                           double **host_lj4, double **host_offset,
-                           double *host_special_lj, const int nlocal,
-                           const int nall, const int max_nbors,
-                           const int maxspecial, const double cell_size,
-                           const double gpu_split, FILE *_screen,
+int CHARMMLongT::init(const int ntypes, double host_cut_bothsq, double **host_lj1,
+                      double **host_lj2, double **host_lj3, double **host_lj4,
+                      double ** /*host_offset*/, double *host_special_lj, const int nlocal,
+                      const int nall, const int max_nbors, const int maxspecial,
+                      const double cell_size, const double gpu_split, FILE *_screen,
                      double host_cut_ljsq, const double host_cut_coulsq,
-                           double *host_special_coul, const double qqrd2e,
-                           const double g_ewald, const double cut_lj_innersq,
-                           const double denom_lj, double **epsilon,
+                      double *host_special_coul, const double qqrd2e, const double g_ewald,
+                      const double cut_lj_innersq, const double denom_lj, double **epsilon,
                      double **sigma, const bool mix_arithmetic) {
  int success;
  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@ -52,7 +52,7 @@ DeviceT::~Device() {
 }

 template <class numtyp, class acctyp>
-int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
+int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu,
                         const int first_gpu_id, const int gpu_mode,
                         const double p_split, const int t_per_atom,
                         const double user_cell_size, char *ocl_args,
@ -528,7 +528,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,

 template <class numtyp, class acctyp>
 int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
-                       const int host_nlocal, const int nall,
+                       const int host_nlocal, const int /*nall*/,
                       const int maxspecial, const int gpu_host,
                       const int max_nbors, const double cutoff,
                       const bool pre_cut, const int threads_per_atom,
--- a/lib/gpu/lal_dpd_tstat_ext.cpp
+++ b/lib/gpu/lal_dpd_tstat_ext.cpp
@ -30,7 +30,7 @@ static DPD<PRECISION,ACC_PRECISION> DPDTMF;
 int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
                       double **host_gamma, double **host_sigma, double **host_cut,
                       double *special_lj, const int inum,
-                 const int nall, const int max_nbors,  const int maxspecial,
+                       const int nall, const int /*max_nbors*/,  const int maxspecial,
                       const double cell_size, int &gpu_mode, FILE *screen) {
  DPDTMF.clear();
  gpu_mode=DPDTMF.device->gpu_mode();
--- a/lib/gpu/lal_eam.cpp
+++ b/lib/gpu/lal_eam.cpp
@ -310,7 +310,7 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
                   const int nall, double **host_x, int *host_type,
                   int *ilist, int *numj, int **firstneigh,
                   const bool eflag_in, const bool vflag_in,
-                   const bool eatom, const bool vatom,
+                   const bool /*eatom*/, const bool /*vatom*/,
                   int &host_start, const double cpu_time,
                   bool &success, void **fp_ptr) {
  this->acc_timers();
@ -386,8 +386,8 @@ int** EAMT::compute(const int ago, const int inum_full, const int nall,
                    double **host_x, int *host_type, double *sublo,
                    double *subhi, tagint *tag, int **nspecial,
                    tagint **special, const bool eflag_in,
-                    const bool vflag_in, const bool eatom,
-                    const bool vatom, int &host_start, int **ilist, int **jnum,
+                    const bool vflag_in, const bool /*eatom*/,
+                    const bool /*vatom*/, int &host_start, int **ilist, int **jnum,
                    const double cpu_time, bool &success, int &inum,
                    void **fp_ptr) {
  this->acc_timers();
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@ -176,19 +176,19 @@ double HippoT::host_memory_usage() const {
 // Compute the repulsion term, returning tep
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void HippoT::compute_repulsion(const int ago, const int inum_full,
-                               const int nall, double **host_x,
-                               int *host_type, int *host_amtype,
-                               int *host_amgroup, double **host_rpole,
-                               double *sublo, double *subhi, tagint *tag,
-                               int **nspecial, tagint **special,
-                               int *nspecial15, tagint **special15,
+void HippoT::compute_repulsion(const int /*ago*/, const int inum_full,
+                               const int /*nall*/, double ** /*host_x*/,
+                               int * /*host_type*/, int * /*host_amtype*/,
+                               int * /*host_amgroup*/, double ** /*host_rpole*/,
+                               double * /*sublo*/, double * /*subhi*/, tagint * /*tag*/,
+                               int ** /*nspecial*/, tagint ** /*special*/,
+                               int * /*nspecial15*/, tagint ** /*special15*/,
                               const bool eflag_in, const bool vflag_in,
                               const bool eatom, const bool vatom,
-                               int &host_start, int **ilist, int **jnum,
-                               const double cpu_time, bool &success,
-                               const double aewald, const double off2_repulse,
-                               double *host_q, double *boxlo, double *prd,
+                               int & /*host_start*/, int ** /*ilist*/, int ** /*jnum*/,
+                               const double /*cpu_time*/, bool & /*success*/,
+                               const double /*aewald*/, const double off2_repulse,
+                               double * /*host_q*/, double * /*boxlo*/, double * /*prd*/,
                               double cut2, double c0, double c1, double c2,
                               double c3, double c4, double c5, void **tep_ptr) {
  this->acc_timers();
@ -223,7 +223,7 @@ void HippoT::compute_repulsion(const int ago, const int inum_full,
  _c3 = c3;
  _c4 = c4;
  _c5 = c5;
-  const int red_blocks=repulsion(this->_eflag,this->_vflag);
+  repulsion(this->_eflag,this->_vflag);

  // copy tep from device to host
  this->_tep.update_host(this->_max_tep_size*4,false);
@ -287,7 +287,7 @@ void HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup,

  this->_off2_disp = off2_disp;
  this->_aewald = aewald;
-  const int red_blocks=dispersion_real(this->_eflag,this->_vflag);
+  dispersion_real(this->_eflag,this->_vflag);

  // only copy them back if this is the last kernel
  //   otherwise, commenting out these two lines to leave the answers
@ -341,21 +341,21 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
 // Compute the multipole real-space term, returning tep
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void HippoT::compute_multipole_real(const int ago, const int inum_full,
-                                     const int nall, double **host_x,
-                                     int *host_type, int *host_amtype,
-                                     int *host_amgroup, double **host_rpole,
-                                     double* host_pval, double *sublo,
-                                     double *subhi, tagint *tag,
-                                     int **nspecial, tagint **special,
-                                     int *nspecial15, tagint **special15,
-                                     const bool eflag_in, const bool vflag_in,
-                                     const bool eatom, const bool vatom,
-                                     int &host_start, int **ilist, int **jnum,
-                                     const double cpu_time, bool &success,
+void HippoT::compute_multipole_real(const int /*ago*/, const int inum_full,
+                                    const int /*nall*/, double ** /*host_x*/,
+                                    int * /*host_type*/, int * /*host_amtype*/,
+                                    int * /*host_amgroup*/, double ** /*host_rpole*/,
+                                    double* host_pval, double * /*sublo*/,
+                                    double * /*subhi*/, tagint * /*tag*/,
+                                    int ** /*nspecial*/, tagint ** /*special*/,
+                                    int * /*nspecial15*/, tagint ** /*special15*/,
+                                    const bool /*eflag_in*/, const bool /*vflag_in*/,
+                                    const bool /*eatom*/, const bool /*vatom*/,
+                                    int & /*host_start*/, int ** /*ilist*/, int ** /*jnum*/,
+                                    const double /*cpu_time*/, bool & /*success*/,
                                     const double aewald, const double felec,
-                                     const double off2_mpole, double *host_q,
-                                     double *boxlo, double *prd, void **tep_ptr) {
+                                    const double off2_mpole, double * /*host_q*/,
+                                    double * /*boxlo*/, double * /*prd*/, void **tep_ptr) {

  // cast necessary data arrays from host to device

@ -373,7 +373,7 @@ void HippoT::compute_multipole_real(const int ago, const int inum_full,
  this->_off2_mpole = off2_mpole;
  this->_felec = felec;
  this->_aewald = aewald;
-  const int red_blocks=multipole_real(this->_eflag,this->_vflag);
+  multipole_real(this->_eflag,this->_vflag);

  // copy tep from device to host
  this->_tep.update_host(this->_max_tep_size*4,false);
@ -424,7 +424,7 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
 //   returning field and fieldp
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole,
+void HippoT::compute_udirect2b(int * /*host_amtype*/, int * /*host_amgroup*/, double ** /*host_rpole*/,
                                double **host_uind, double **host_uinp, double* host_pval,
                                const double aewald, const double off2_polar,
                                void** fieldp_ptr) {
@ -438,7 +438,7 @@ void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **hos

  this->_off2_polar = off2_polar;
  this->_aewald = aewald;
-  const int red_blocks=udirect2b(this->_eflag,this->_vflag);
+  udirect2b(this->_eflag,this->_vflag);

  // copy field and fieldp from device to host (_fieldp store both arrays, one after another)

@ -449,7 +449,7 @@ void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **hos
 // Launch the real-space permanent field kernel
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int HippoT::udirect2b(const int eflag, const int vflag) {
+int HippoT::udirect2b(const int /*eflag*/, const int /*vflag*/) {
  int ainum=this->ans->inum();
  if (ainum == 0)
    return 0;
@ -493,10 +493,9 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
 //   returning field and fieldp
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole,
-                                double **host_uind, double **host_uinp, double *host_pval,
-                                const double aewald, const double off2_polar,
-                                void** fieldp_ptr) {
+void HippoT::compute_umutual2b(int * /*host_amtype*/, int * /*host_amgroup*/, double ** /*host_rpole*/,
+                               double **host_uind, double **host_uinp, double * /*host_pval*/,
+                               const double aewald, const double off2_polar, void ** /*fieldp_ptr*/) {

  // cast necessary data arrays from host to device

@ -505,7 +504,7 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos

  this->_off2_polar = off2_polar;
  this->_aewald = aewald;
-  const int red_blocks=umutual2b(this->_eflag,this->_vflag);
+  umutual2b(this->_eflag,this->_vflag);

  // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
  // NOTE: move this step to update_fieldp() to delay device-host transfer
@ -517,7 +516,7 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos
 // Launch the real-space induced field kernel
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int HippoT::umutual2b(const int eflag, const int vflag) {
+int HippoT::umutual2b(const int /*eflag*/, const int /*vflag*/) {
  int ainum=this->ans->inum();
  if (ainum == 0)
    return 0;
@ -557,8 +556,8 @@ int HippoT::umutual2b(const int eflag, const int vflag) {
 // Reneighbor on GPU if necessary, and then compute polar real-space
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void HippoT::compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
-                                double **host_uind, double **host_uinp, double *host_pval,
+void HippoT::compute_polar_real(int * /*host_amtype*/, int * /*host_amgroup*/, double ** /*host_rpole*/,
+                                double **host_uind, double **host_uinp, double * /*host_pval*/,
                                const bool eflag_in, const bool vflag_in,
                                const bool eatom, const bool vatom,
                                const double aewald, const double felec,
--- a/lib/gpu/lal_hippo_ext.cpp
+++ b/lib/gpu/lal_hippo_ext.cpp
@ -123,7 +123,7 @@ void hippo_gpu_clear() {
 int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall,
                           double **host_x, int *host_type, int *host_amtype,
                           int *host_amgroup, double **host_rpole,
-                           double **host_uind, double **host_uinp, double *host_pval,
+                           double ** /*host_uind*/, double ** /*host_uinp*/, double * /*host_pval*/,
                           double *sublo, double *subhi, tagint *tag,
                           int **nspecial, tagint **special,
                           int *nspecial15, tagint **special15,
--- a/lib/gpu/lal_neighbor.h
+++ b/lib/gpu/lal_neighbor.h
@ -293,15 +293,17 @@ class Neighbor {
  #endif

  int _simd_size;
-  inline void set_nbor_block_size(const int mn) {
  #ifdef LAL_USE_OLD_NEIGHBOR
+  inline void set_nbor_block_size(const int mn) {
    int desired=mn/(2*_simd_size);
    desired*=_simd_size;
    if (desired<_simd_size) desired=_simd_size;
    else if (desired>_max_block_nbor_build) desired=_max_block_nbor_build;
    _block_nbor_build=desired;
-    #endif
  }
+  #else
+  inline void set_nbor_block_size(const int) {}
+  #endif
 };

 }
--- a/lib/gpu/lal_sw.cpp
+++ b/lib/gpu/lal_sw.cpp
@ -150,7 +150,7 @@ double SWT::host_memory_usage() const {
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 int SWT::loop(const int eflag, const int vflag, const int evatom,
-              bool &success) {
+              bool & /*success*/) {
  const int nbor_pitch=this->nbor->nbor_pitch();

  // build the short neighbor list
--- a/lib/gpu/lal_vashishta.cpp
+++ b/lib/gpu/lal_vashishta.cpp
@ -56,7 +56,7 @@ int VashishtaT::init(const int ntypes, const int nlocal, const int nall, const i
           const double* costheta, const double* bigb,
           const double* big2b, const double* bigc)
 {
-  int success;
+  int success=0;
  success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
                           _screen,vashishta,"k_vashishta","k_vashishta_three_center",
                           "k_vashishta_three_end","k_vashishta_short_nbor");
@ -211,7 +211,7 @@ double VashishtaT::host_memory_usage() const {
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 int VashishtaT::loop(const int eflag, const int vflag, const int evatom,
-                     bool &success) {
+                     bool & /*success*/) {
  const int nbor_pitch=this->nbor->nbor_pitch();

  // build the short neighbor list