more whitespace cleanup

2018-03-15 22:02:02 -04:00
parent 7d2ada9d80
commit 94a923191a
43 changed files with 213 additions and 213 deletions
--- a/src/GRANULAR/fix_wall_gran_region.h
+++ b/src/GRANULAR/fix_wall_gran_region.h
@ -32,7 +32,7 @@ class FixWallGranRegion : public FixWallGran {
  void write_restart(FILE *);
  void restart(char* );
  void init();
-    
+
  double memory_usage();
  void grow_arrays(int);
  void copy_arrays(int, int, int);
@ -48,17 +48,17 @@ class FixWallGranRegion : public FixWallGran {
  class Region *region;
  char *region_style;
  int nregion;
-  
+
  // shear history for multiple contacts per particle

-  int tmax;              // max # of region walls one particle can touch 
+  int tmax;              // max # of region walls one particle can touch
  int *ncontact;         // # of shear contacts per particle
  int **walls;           // which wall each contact is with
  double ***shearmany;   // shear history per particle per contact
  int *c2r;              // contact to region mapping
                         // c2r[i] = index of Ith contact in
                         //   region-contact[] list of contacts
-  int motion_resetflag;  // used by restart to indicate that region 
+  int motion_resetflag;  // used by restart to indicate that region
                         //    vel info is to be reset

  void update_contacts(int, int);
--- a/src/KOKKOS/gridcomm_kokkos.h
+++ b/src/KOKKOS/gridcomm_kokkos.h
@ -32,7 +32,7 @@ class GridCommKokkos : protected Pointers {
 public:
  typedef DeviceType device_type;
  typedef ArrayTypes<DeviceType> AT;
- 
+
  GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
           int, int, int, int, int, int,
           int, int, int, int, int, int,
--- a/src/KOKKOS/kokkos_base.h
+++ b/src/KOKKOS/kokkos_base.h
@ -29,8 +29,8 @@ class KokkosBase {
  virtual void unpack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};

  // Pair
-  virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, 
-                                       int, DAT::tdual_xfloat_1d &, 
+  virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d,
+                                       int, DAT::tdual_xfloat_1d &,
                                       int, int *) {return 0;};
  virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {}

--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@ -984,7 +984,7 @@ void memset_kokkos (ViewType &view) {

 struct params_lj_coul {
  KOKKOS_INLINE_FUNCTION
-  params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};   
+  params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
  KOKKOS_INLINE_FUNCTION
  params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
  F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
--- a/src/KOKKOS/math_special_kokkos.h
+++ b/src/KOKKOS/math_special_kokkos.h
@ -37,7 +37,7 @@ namespace MathSpecialKokkos {
  }

  // exp(-x*x) for coul/long styles
-  
+
  static inline double expmsq(double x)
  {
    x *= x;
--- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
@ -82,7 +82,7 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
  typename AT::t_f_array f;
  typename AT::t_int_1d_randomread type;
  typename AT::t_float_1d_randomread q;
-  
+
  DAT::tdual_efloat_1d k_eatom;
  DAT::tdual_virial_array k_vatom;
  typename AT::t_efloat_1d d_eatom;
--- a/src/KOKKOS/pair_morse_kokkos.h
+++ b/src/KOKKOS/pair_morse_kokkos.h
@ -71,7 +71,7 @@ class PairMorseKokkos : public PairMorse {

  Kokkos::DualView<params_morse**,Kokkos::LayoutRight,DeviceType> k_params;
  typename Kokkos::DualView<params_morse**,Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
-  params_morse m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; 
+  params_morse m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
  typename ArrayTypes<DeviceType>::t_x_array c_x;
--- a/src/KOKKOS/pair_snap_kokkos.h
+++ b/src/KOKKOS/pair_snap_kokkos.h
@ -44,7 +44,7 @@ public:

  PairSNAPKokkos(class LAMMPS *);
  ~PairSNAPKokkos();
-  
+
  void coeff(int, char**);
  void init_style();
  double init_one(int, int);
--- a/src/KOKKOS/pair_snap_kokkos_impl.h
+++ b/src/KOKKOS/pair_snap_kokkos_impl.h
@ -114,8 +114,8 @@ struct FindMaxNumNeighs {
  typedef DeviceType device_type;
  NeighListKokkos<DeviceType> k_list;

-  FindMaxNumNeighs(NeighListKokkos<DeviceType>* nl): k_list(*nl) {}  
-  ~FindMaxNumNeighs() {k_list.copymode = 1;}  
+  FindMaxNumNeighs(NeighListKokkos<DeviceType>* nl): k_list(*nl) {}
+  ~FindMaxNumNeighs() {k_list.copymode = 1;}

  KOKKOS_INLINE_FUNCTION
  void operator() (const int& ii, int& max_neighs) const {
@ -134,14 +134,14 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 {
  eflag = eflag_in;
  vflag = vflag_in;
-  
+
  if (neighflag == FULL) no_virial_fdotr_compute = 1;
-  
+
  if (eflag || vflag) ev_setup(eflag,vflag,0);
  else evflag = vflag_fdotr = 0;

  // reallocate per-atom arrays if necessary
-  
+
  if (eflag_atom) {
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
@ -241,7 +241,7 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    virial[4] += ev.v[4];
    virial[5] += ev.v[5];
  }
-  
+
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  if (eflag_atom) {
@ -282,7 +282,7 @@ double PairSNAPKokkos<DeviceType>::init_one(int i, int j)
  double cutone = PairSNAP::init_one(i,j);
  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
  k_cutsq.template modify<LMPHostType>();
-  
+
  return cutone;
 }

@ -469,7 +469,7 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
    }

    if (quadraticflag) {
-    
+
      int k = ncoeff+1;
      for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
        double bveci = my_sna.bvec[icoeff];
@ -535,7 +535,7 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
        my_sna.compute_bi(team);
        my_sna.copy_bi2bvec(team);
      }
-      
+
      // E = beta.B + 0.5*B^t.alpha.B
      // coeff[k] = beta[k-1] or
      // coeff[k] = alpha_ii or
@ -545,7 +545,7 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
      Kokkos::single(Kokkos::PerThread(team), [&] () {

      // evdwl = energy of atom I, sum over coeffs_k * Bi_k
-    
+
      double evdwl = d_coeffi[0];

      // linear contributions
@ -553,7 +553,7 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
          evdwl += d_coeffi[k]*my_sna.bvec[k-1];

        // quadratic contributions
-        
+
        if (quadraticflag) {
          int k = ncoeff+1;
          for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
--- a/src/KOKKOS/pair_tersoff_kokkos.h
+++ b/src/KOKKOS/pair_tersoff_kokkos.h
@ -189,7 +189,7 @@ class PairTersoffKokkos : public PairTersoff {
  // hardwired to space for 12 atom types
  //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];

-  int inum; 
+  int inum;
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_1d_randomread type;
--- a/src/KOKKOS/pair_tersoff_mod_kokkos.h
+++ b/src/KOKKOS/pair_tersoff_mod_kokkos.h
@ -189,7 +189,7 @@ class PairTersoffMODKokkos : public PairTersoffMOD {
  // hardwired to space for 12 atom types
  //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];

-  int inum; 
+  int inum;
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_1d_randomread type;
--- a/src/KOKKOS/pair_tersoff_zbl_kokkos.h
+++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.h
@ -194,7 +194,7 @@ class PairTersoffZBLKokkos : public PairTersoffZBL {
  // hardwired to space for 12 atom types
  //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];

-  int inum; 
+  int inum;
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_1d_randomread type;
--- a/src/KOKKOS/sna_kokkos_impl.h
+++ b/src/KOKKOS/sna_kokkos_impl.h
@ -28,10 +28,10 @@ template<class DeviceType>
 inline
 SNAKokkos<DeviceType>::SNAKokkos(double rfac0_in,
         int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in,
-         double rmin0_in, int switch_flag_in, int bzero_flag_in) 
+         double rmin0_in, int switch_flag_in, int bzero_flag_in)
 {
  wself = 1.0;
-  
+
  use_shared_arrays = use_shared_arrays_in;
  rfac0 = rfac0_in;
  rmin0 = rmin0_in;
@ -46,7 +46,7 @@ SNAKokkos<DeviceType>::SNAKokkos(double rfac0_in,
  //create_twojmax_arrays();

  nmax = 0;
-  
+
  build_indexlist();

  int jdim = twojmax + 1;
@ -1000,7 +1000,7 @@ void SNAKokkos<DeviceType>::compute_duarray(const typename Kokkos::TeamPolicy<De
  dsfac *= wj;

  for (int j = 0; j <= twojmax; j++)
-    for (int mb = 0; mb <= j; mb++) 
+    for (int mb = 0; mb <= j; mb++)
      for (int ma = 0; ma <= j; ma++) {
        duarray_r(j,mb,ma,0) = dsfac * uarray_r(j,ma,mb) * ux +
                                  sfac * duarray_r(j,mb,ma,0);
--- a/src/KSPACE/pair_lj_charmmfsw_coul_long.h
+++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.h
@ -49,7 +49,7 @@ class PairLJCharmmfswCoulLong : public Pair {

 protected:
  int implicit;
-  int dihedflag; 
+  int dihedflag;

  double cut_lj_inner,cut_lj,cut_ljinv,cut_lj_innerinv;
  double cut_lj_innersq,cut_ljsq;
--- a/src/MANYBODY/pair_polymorphic.h
+++ b/src/MANYBODY/pair_polymorphic.h
@ -5,7 +5,7 @@

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
+   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
@ -75,7 +75,7 @@ class PairPolymorphic : public Pair {
    tabularFunction(int n, double x1, double x2) {
      size = n;
      xmin = x1;
-      xmax = x2; 
+      xmax = x2;
      xmaxsq = xmax*xmax;
      xs = new double[n];
      ys = new double[n];
@ -87,9 +87,9 @@ class PairPolymorphic : public Pair {
      ys6 = new double[n];
    }
    virtual ~tabularFunction() {
-      if (xs) delete [] xs; 
-      if (ys) delete [] ys; 
-      if (ys1) delete [] ys1; 
+      if (xs) delete [] xs;
+      if (ys) delete [] ys;
+      if (ys1) delete [] ys1;
      if (ys2) delete [] ys2;
      if (ys3) delete [] ys3;
      if (ys4) delete [] ys4;
@ -255,7 +255,7 @@ class PairPolymorphic : public Pair {
  bool eta; // global indicator
  int nx,nr,ng; // table sizes
  double maxX;
-  
+
  // parameter sets
  PairParameters    * pairParameters;    // for I-J interaction
  TripletParameters * tripletParameters; // for I-J-K interaction
@ -264,7 +264,7 @@ class PairPolymorphic : public Pair {
  int *firstneighV,*firstneighW,*firstneighW1;
  double *delxV,*delyV,*delzV,*drV;
  double *delxW,*delyW,*delzW,*drW;
-  
+
  char **elements;              // names of unique elements
  int **elem2param;             // map: element pairs to parameters
  int ***elem3param;            // map: element triplets to parameters
@ -296,7 +296,7 @@ class PairPolymorphic : public Pair {
    return x[0]*y[0] + x[1]*y[1] + x[2]*y[2];
  }

-  inline void vec3_add(const double x[3], const double y[3], 
+  inline void vec3_add(const double x[3], const double y[3],
 		       double * const z) const {
    z[0] = x[0]+y[0];  z[1] = x[1]+y[1];  z[2] = x[2]+y[2];
  }
@ -306,7 +306,7 @@ class PairPolymorphic : public Pair {
    y[0] = k*x[0];  y[1] = k*x[1];  y[2] = k*x[2];
  }

-  inline void vec3_scaleadd(const double k, const double x[3], 
+  inline void vec3_scaleadd(const double k, const double x[3],
 			    const double y[3], double * const z) const {
    z[0] = k*x[0]+y[0];
    z[1] = k*x[1]+y[1];
--- a/src/MC/fix_gcmc.h
+++ b/src/MC/fix_gcmc.h
@ -58,7 +58,7 @@ class FixGCMC : public Fix {
  void write_restart(FILE *);
  void restart(char *);
  void grow_molecule_arrays(int);
-  
+
 private:
  int molecule_group,molecule_group_bit;
  int molecule_group_inversebit;
@ -118,9 +118,9 @@ class FixGCMC : public Fix {
  double *molq;
  imageint *molimage;
  imageint imagezero;
-  double overlap_cutoffsq; // square distance cutoff for overlap 
+  double overlap_cutoffsq; // square distance cutoff for overlap
  int overlap_flag;
-  
+
  double energy_intra;

  class Pair *pair;
@ -219,12 +219,12 @@ W: Fix gcmc using full_energy option
 Fix gcmc has automatically turned on the full_energy option since it
 is required for systems like the one specified by the user. User input
 included one or more of the following: kspace, a hybrid
-pair style, an eam pair style, tail correction, 
+pair style, an eam pair style, tail correction,
 or no "single" function for the pair style.

-W: Energy of old configuration in fix gcmc is > MAXENERGYTEST. 
+W: Energy of old configuration in fix gcmc is > MAXENERGYTEST.

-This probably means that a pair of atoms are closer than the 
+This probably means that a pair of atoms are closer than the
 overlap cutoff distance for keyword overlap_cutoff.

 W: Fix gcmc is being applied to the default group all
--- a/src/MOLECULE/fix_cmap.h
+++ b/src/MOLECULE/fix_cmap.h
@ -63,7 +63,7 @@ class FixCMAP : public Fix {
  int unpack_exchange(int, double *);

  double memory_usage();
- 
+
 private:
  int nprocs,me;
  int newton_bond,eflag_caller;
@ -111,7 +111,7 @@ class FixCMAP : public Fix {
  void spl_interpolate(double, double *, double *, double &, double &);

  // calculate dihedral angles
-     
+
  double dihedral_angle_atan2(double, double, double, double, double, double,
                              double, double, double, double);

--- a/src/REPLICA/compute_event_displace.h
+++ b/src/REPLICA/compute_event_displace.h
@ -34,7 +34,7 @@ class ComputeEventDisplace : public Compute {
  int all_events();
  void reset_extra_compute_fix(const char *);

-  
+
 private:
  int triclinic;
  double displace_distsq;
--- a/src/RIGID/fix_ehex.h
+++ b/src/RIGID/fix_ehex.h
@ -52,7 +52,7 @@ class FixEHEX : public Fix {
  double scale;
  char *idregion;
  int me;
- 
+
  double **x;              // coordinates
  double **f;              // forces
  double **v;              // velocities
@ -64,7 +64,7 @@ class FixEHEX : public Fix {
  int constraints;          // constraints (0/1)
  int cluster;              // rescaling entire clusters (0/1)
  int hex;                  // HEX mode (0/1)
-  bool *scalingmask;       // scalingmask[i] determines whether 
+  bool *scalingmask;       // scalingmask[i] determines whether
                            // the velocity of atom i is to be rescaled
 };

@ -75,7 +75,7 @@ class FixEHEX : public Fix {

 /* ERROR/WARNING messages:

-E: Illegal fix ehex command: wrong number of parameters 
+E: Illegal fix ehex command: wrong number of parameters

 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
--- a/src/RIGID/fix_rigid_nh_small.h
+++ b/src/RIGID/fix_rigid_nh_small.h
@ -79,7 +79,7 @@ class FixRigidNHSmall : public FixRigidSmall {
  void compute_press_target();
  void nh_epsilon_dot();
  void compute_dof();
-  
+
  void allocate_chain();
  void allocate_order();
  void deallocate_chain();
--- a/src/RIGID/fix_shake.h
+++ b/src/RIGID/fix_shake.h
@ -67,7 +67,7 @@ class FixShake : public Fix {
  int max_iter;                          // max # of SHAKE iterations
  int output_every;                      // SHAKE stat output every so often
  bigint next_output;                    // timestep for next output
-  
+
                                         // settings from input command
  int *bond_flag,*angle_flag;            // bond/angle types to constrain
  int *type_flag;                        // constrain bonds to these types
--- a/src/USER-CGDNA/pair_oxdna2_excv.h
+++ b/src/USER-CGDNA/pair_oxdna2_excv.h
@ -28,7 +28,7 @@ class PairOxdna2Excv : public PairOxdnaExcv {
 public:
  PairOxdna2Excv(class LAMMPS *);
  virtual ~PairOxdna2Excv();
-  virtual void compute_interaction_sites(double *, 
+  virtual void compute_interaction_sites(double *,
    double *, double *, double *);
 };

--- a/src/USER-CGDNA/pair_oxdna_excv.h
+++ b/src/USER-CGDNA/pair_oxdna_excv.h
@ -28,7 +28,7 @@ class PairOxdnaExcv : public Pair {
 public:
  PairOxdnaExcv(class LAMMPS *);
  virtual ~PairOxdnaExcv();
-  virtual void compute_interaction_sites(double *, double *, 
+  virtual void compute_interaction_sites(double *, double *,
    double *, double *);
  virtual void compute(int, int);
  void settings(int, char **);
--- a/src/USER-INTEL/dihedral_fourier_intel.h
+++ b/src/USER-INTEL/dihedral_fourier_intel.h
@ -57,7 +57,7 @@ class DihedralFourierIntel : public DihedralFourier {
  template <class flt_t>
  class ForceConst {
   public:
-    typedef struct { flt_t cos_shift, sin_shift, k; 
+    typedef struct { flt_t cos_shift, sin_shift, k;
      int multiplicity; } fc_packed1;

    fc_packed1 **bp;
@ -65,7 +65,7 @@ class DihedralFourierIntel : public DihedralFourier {
    ForceConst() : _nbondtypes(0)  {}
    ~ForceConst() { set_ntypes(0, NULL, NULL, NULL); }

-    void set_ntypes(const int nbondtypes, int *setflag, int *nterms, 
+    void set_ntypes(const int nbondtypes, int *setflag, int *nterms,
 		    Memory *memory);

   private:
--- a/src/USER-INTEL/intel_intrinsics_airebo.h
+++ b/src/USER-INTEL/intel_intrinsics_airebo.h
@ -60,59 +60,59 @@ namespace mm512 {
 #ifndef __AVX512F__

 #ifndef FVEC_FIRST_PASS
-VEC_INLINE static inline __m512i _mm512_mask_expand_epi32(__m512i src, 
-							  __mmask16 k, 
+VEC_INLINE static inline __m512i _mm512_mask_expand_epi32(__m512i src,
+							  __mmask16 k,
 							  __m512i a) {
  int buf[16] __attribute__((aligned(64)));
  _mm512_store_epi32(buf, a);
  return _mm512_mask_loadunpacklo_epi32(src, k, buf);
 }
-VEC_INLINE static inline __m512i _mm512_maskz_expand_epi32(__mmask16 k, 
+VEC_INLINE static inline __m512i _mm512_maskz_expand_epi32(__mmask16 k,
 							   __m512i a) {
  int buf[16] __attribute__((aligned(64)));
  _mm512_store_epi32(buf, a);
  return _mm512_mask_loadunpacklo_epi32(_mm512_setzero_epi32(), k, buf);
 }
-VEC_INLINE static inline __m512i _mm512_mask_compress_epi32(__m512i src, 
-							    __mmask16 k, 
+VEC_INLINE static inline __m512i _mm512_mask_compress_epi32(__m512i src,
+							    __mmask16 k,
 							    __m512i a) {
  int buf[16] __attribute__((aligned(64)));
  _mm512_store_epi32(buf, src);
  _mm512_mask_packstorelo_epi32(buf, k, a);
  return _mm512_load_epi32(buf);
 }
-VEC_INLINE static inline __m512i _mm512_maskz_compress_epi32(__mmask16 k, 
+VEC_INLINE static inline __m512i _mm512_maskz_compress_epi32(__mmask16 k,
 							     __m512i a) {
  int buf[16] __attribute__((aligned(64))) = {0};
  _mm512_mask_packstorelo_epi32(buf, k, a);
  return _mm512_load_epi32(buf);
 }

-VEC_INLINE static inline void _mm512_mask_compressstoreu_epi32(int * dest, 
-							       __mmask16 mask, 
+VEC_INLINE static inline void _mm512_mask_compressstoreu_epi32(int * dest,
+							       __mmask16 mask,
 							       __m512i src) {
  _mm512_mask_packstorelo_epi32(dest, mask, src);
  _mm512_mask_packstorehi_epi32(dest + 16, mask, src);
 }

-VEC_INLINE static inline __m512i _mm512_mask_loadu_epi32(__m512i src, 
-							 __mmask16 k, 
+VEC_INLINE static inline __m512i _mm512_mask_loadu_epi32(__m512i src,
+							 __mmask16 k,
 							 const int * mem_addr) {
  assert((k & (k + 1)) == 0);
  __m512i ret = _mm512_mask_loadunpacklo_epi32(src, k, mem_addr);
  ret = _mm512_mask_loadunpackhi_epi32(ret, k, mem_addr + 16);
  return ret;
 }
-VEC_INLINE static inline __m512i _mm512_maskz_loadu_epi32(__mmask16 k, 
+VEC_INLINE static inline __m512i _mm512_maskz_loadu_epi32(__mmask16 k,
 							const int * mem_addr) {
  assert((k & (k + 1)) == 0);
-  __m512i ret = _mm512_mask_loadunpacklo_epi32(_mm512_setzero_epi32(), k, 
+  __m512i ret = _mm512_mask_loadunpacklo_epi32(_mm512_setzero_epi32(), k,
 					       mem_addr);
  ret = _mm512_mask_loadunpackhi_epi32(ret, k, mem_addr + 16);
  return ret;
 }
-VEC_INLINE static inline void _mm512_mask_storeu_epi32(int * dest, 
-						       __mmask16 mask, 
+VEC_INLINE static inline void _mm512_mask_storeu_epi32(int * dest,
+						       __mmask16 mask,
 						       __m512i src) {
  assert((mask & (mask + 1)) == 0);
  _mm512_mask_packstorelo_epi32(dest, mask, src);
@ -181,7 +181,7 @@ public:
  VEC_INLINE static int kortestz(const BVEC_NAME &a, const BVEC_NAME &b) {
    return _mm512_kortestz(a.val_, b.val_);
  }
-  VEC_INLINE static BVEC_NAME masku_compress(const BVEC_NAME &mask, 
+  VEC_INLINE static BVEC_NAME masku_compress(const BVEC_NAME &mask,
 					     const BVEC_NAME &a) {
    const __m512i c_i1 = _mm512_set1_epi32(1);
    __m512i a_int_vec = _mm512_mask_blend_epi32(a.val_, _mm512_setzero_epi32(),
@ -190,13 +190,13 @@ public:
 						    mask.val_, a_int_vec);
    return _mm512_cmpeq_epi32_mask(compressed, c_i1);
  }
-  VEC_INLINE static BVEC_NAME mask_expand(const BVEC_NAME &src, 
+  VEC_INLINE static BVEC_NAME mask_expand(const BVEC_NAME &src,
 					  const BVEC_NAME &mask,
 					  const BVEC_NAME &a) {
    const __m512i c_i1 = _mm512_set1_epi32(1);
    __m512i a_int_vec = _mm512_mask_blend_epi32(a.val_, _mm512_setzero_epi32(),
 						c_i1);
-    __m512i src_int_vec = _mm512_mask_blend_epi32(src.val_, 
+    __m512i src_int_vec = _mm512_mask_blend_epi32(src.val_,
 						  _mm512_setzero_epi32(), c_i1);
    __m512i compressed = _mm512_mask_expand_epi32(src_int_vec, mask.val_,
 						  a_int_vec);
@ -318,19 +318,19 @@ public:
  VEC_INLINE static IVEC_NAME load(const int * src) {
    return _mm512_load_epi32(src);
  }
-  VEC_INLINE static IVEC_NAME mask_loadu(const BVEC_NAME &mask, 
+  VEC_INLINE static IVEC_NAME mask_loadu(const BVEC_NAME &mask,
                                         const int * src) {
    assert((mask.val_ & (mask.val_ + 1)) == 0);
    assert(mask.val_ <= BVEC_NAME::full().val_);
    return _mm512_mask_loadu_epi32(_mm512_undefined_epi32(), mask.val_, src);
  }
-  VEC_INLINE static IVEC_NAME maskz_loadu(const BVEC_NAME &mask, 
+  VEC_INLINE static IVEC_NAME maskz_loadu(const BVEC_NAME &mask,
                                          const int * src) {
    assert((mask.val_ & (mask.val_ + 1)) == 0);
    assert(mask.val_ <= BVEC_NAME::full().val_);
    return _mm512_maskz_loadu_epi32(mask.val_, src);
  }
-  VEC_INLINE static void mask_storeu(const BVEC_NAME &mask, int * dest, 
+  VEC_INLINE static void mask_storeu(const BVEC_NAME &mask, int * dest,
    const IVEC_NAME &src) {
    assert((mask.val_ & (mask.val_ + 1)) == 0);
    assert(mask.val_ <= BVEC_NAME::full().val_);
@ -341,16 +341,16 @@ public:
  }

  VEC_INLINE static IVEC_NAME mask_gather(
-      const IVEC_NAME &src, const BVEC_NAME &mask, const IVEC_NAME &idx, 
+      const IVEC_NAME &src, const BVEC_NAME &mask, const IVEC_NAME &idx,
      const int * mem, const int scale
  ) {
    assert(mask.val_ <= BVEC_NAME::full().val_);
    assert(scale == sizeof(int));
-    return _mm512_mask_i32gather_epi32(src.val_, mask.val_, idx.val_, mem, 
+    return _mm512_mask_i32gather_epi32(src.val_, mask.val_, idx.val_, mem,
      sizeof(int));
  }
  VEC_INLINE static void mask_i32scatter(
-      int * mem, const BVEC_NAME &mask, const IVEC_NAME &idx, 
+      int * mem, const BVEC_NAME &mask, const IVEC_NAME &idx,
      const IVEC_NAME &a, const int scale
  ) {
    assert(mask.val_ <= BVEC_NAME::full().val_);
@ -505,8 +505,8 @@ public:
    FVEC_SUFFIX(_mm512_store_)(dest, a.val_);
  }

-  VEC_INLINE static FVEC_NAME gather(const IVEC_NAME &idx, 
-				     const FVEC_SCAL_T * mem, 
+  VEC_INLINE static FVEC_NAME gather(const IVEC_NAME &idx,
+				     const FVEC_SCAL_T * mem,
 				     const int scale) {
    assert(scale == sizeof(FVEC_SCAL_T));
 #   if FVEC_LEN==8
@ -529,22 +529,22 @@ public:
 #   endif
  }

-  VEC_INLINE static void gather_3_adjacent(const IVEC_NAME &idx, 
-					   const FVEC_SCAL_T * mem, 
-					   const int scale, 
-					   FVEC_NAME * out_0, 
-					   FVEC_NAME * out_1, 
+  VEC_INLINE static void gather_3_adjacent(const IVEC_NAME &idx,
+					   const FVEC_SCAL_T * mem,
+					   const int scale,
+					   FVEC_NAME * out_0,
+					   FVEC_NAME * out_1,
 					   FVEC_NAME * out_2) {
    assert(scale == sizeof(FVEC_SCAL_T));
    *out_0 = FVEC_NAME::gather(idx, mem + 0, scale);
    *out_1 = FVEC_NAME::gather(idx, mem + 1, scale);
    *out_2 = FVEC_NAME::gather(idx, mem + 2, scale);
  }
-  VEC_INLINE static void gather_4_adjacent(const IVEC_NAME &idx, 
-					   const FVEC_SCAL_T * mem, 
+  VEC_INLINE static void gather_4_adjacent(const IVEC_NAME &idx,
+					   const FVEC_SCAL_T * mem,
 					   const int scale, FVEC_NAME * out_0,
-					   FVEC_NAME * out_1, 
-					   FVEC_NAME * out_2, 
+					   FVEC_NAME * out_1,
+					   FVEC_NAME * out_2,
 					   FVEC_NAME * out_3) {
    assert(scale == sizeof(FVEC_SCAL_T));
    *out_0 = FVEC_NAME::gather(idx, mem + 0, scale);
@ -553,7 +553,7 @@ public:
    *out_3 = FVEC_NAME::gather(idx, mem + 3, scale);
  }

-  VEC_INLINE static FVEC_SCAL_T mask_reduce_add(const BVEC_NAME &mask, 
+  VEC_INLINE static FVEC_SCAL_T mask_reduce_add(const BVEC_NAME &mask,
 						const FVEC_NAME &a) {
    return FVEC_SUFFIX(_mm512_mask_reduce_add_)(mask.val_, a.val_);
  }
@ -588,7 +588,7 @@ public:

  VEC_INLINE static void gather_prefetch0(const IVEC_NAME &a, void * mem) {
    #ifdef __AVX512PF__
-    _mm512_mask_prefetch_i32gather_ps(a.val_, BVEC_NAME::full().val_, mem, 
+    _mm512_mask_prefetch_i32gather_ps(a.val_, BVEC_NAME::full().val_, mem,
      sizeof(FVEC_SCAL_T), _MM_HINT_T0);
    #endif
  }
@ -621,10 +621,10 @@ public:
  ) {
    assert(scale == sizeof(FVEC_SCAL_T));
 #   if FVEC_LEN==8
-    FVEC_SUFFIX(_mm512_mask_i32loscatter_)(mem, mask.val_, idx.val_, a.val_, 
+    FVEC_SUFFIX(_mm512_mask_i32loscatter_)(mem, mask.val_, idx.val_, a.val_,
 					   sizeof(FVEC_SCAL_T));
 #   else
-    FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_, idx.val_, a.val_, 
+    FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_, idx.val_, a.val_,
 					 sizeof(FVEC_SCAL_T));
 #   endif
  }
@ -636,7 +636,7 @@ public:
  AVEC_BINOP(-, sub)

  VEC_INLINE static void gather_prefetch0(const IVEC_NAME &a, void * mem) {
-    _mm512_mask_prefetch_i32gather_ps(a.val_, BVEC_NAME::full().val_, mem, 
+    _mm512_mask_prefetch_i32gather_ps(a.val_, BVEC_NAME::full().val_, mem,
 				      sizeof(FVEC_SCAL_T), _MM_HINT_T0);
  }
 };
@ -644,7 +644,7 @@ public:
 #if FVEC_LEN==16
 class avec16pd {
  __m512d lo_, hi_;
-  VEC_INLINE avec16pd(const __m512d &lo, const __m512d &hi) : lo_(lo), hi_(hi) 
+  VEC_INLINE avec16pd(const __m512d &lo, const __m512d &hi) : lo_(lo), hi_(hi)
    {}
  VEC_INLINE static __mmask8 get_bvec_hi(__mmask16 a) {
    return a >> 8;
@ -665,10 +665,10 @@ public:
      const double * mem, const int scale
  ) {
    assert(scale == sizeof(double));
-    __m512d lo = _mm512_mask_i32logather_pd(src.lo_, mask.val_, idx.val_, mem, 
+    __m512d lo = _mm512_mask_i32logather_pd(src.lo_, mask.val_, idx.val_, mem,
 					    sizeof(double));
-    __m512d hi = _mm512_mask_i32logather_pd(src.hi_, get_bvec_hi(mask.val_), 
-					    get_ivec_hi(idx.val_), mem, 
+    __m512d hi = _mm512_mask_i32logather_pd(src.hi_, get_bvec_hi(mask.val_),
+					    get_ivec_hi(idx.val_), mem,
 					    sizeof(double));
    return avec16pd(lo, hi);
  }
@ -677,9 +677,9 @@ public:
      const avec16pd &a, const int scale
  ) {
    assert(scale == sizeof(double));
-    _mm512_mask_i32loscatter_pd(mem, mask.val_, idx.val_, a.lo_, 
+    _mm512_mask_i32loscatter_pd(mem, mask.val_, idx.val_, a.lo_,
 				sizeof(double));
-    _mm512_mask_i32loscatter_pd(mem, get_bvec_hi(mask.val_), 
+    _mm512_mask_i32loscatter_pd(mem, get_bvec_hi(mask.val_),
 				get_ivec_hi(idx.val_), a.hi_, sizeof(double));
  }

@ -692,7 +692,7 @@ public:
  AVEC2_BINOP(-, sub)

  VEC_INLINE static void gather_prefetch0(const IVEC_NAME &a, void * mem) {
-    _mm512_mask_prefetch_i32gather_ps(a.val_, BVEC_NAME::full().val_, mem, 
+    _mm512_mask_prefetch_i32gather_ps(a.val_, BVEC_NAME::full().val_, mem,
 				      sizeof(double), _MM_HINT_T0);
  }
 };
@ -808,17 +808,17 @@ VEC_INLINE inline __m256i _cm256_and_si256(const __m256i &a, const __m256i &b) {
  IVEC_EM_BIN(_mm_and_si128)
 }

-VEC_INLINE inline __m256i _cm256_andnot_si256(const __m256i &a, 
+VEC_INLINE inline __m256i _cm256_andnot_si256(const __m256i &a,
 					      const __m256i &b) {
  IVEC_EM_BIN(_mm_andnot_si128)
 }

-VEC_INLINE inline __m256i _cm256_cmpeq_epi32(const __m256i &a, 
+VEC_INLINE inline __m256i _cm256_cmpeq_epi32(const __m256i &a,
 					     const __m256i &b) {
  IVEC_EM_BIN(_mm_cmpeq_epi32)
 }

-VEC_INLINE inline __m256i _cm256_cmpgt_epi32(const __m256i &a, 
+VEC_INLINE inline __m256i _cm256_cmpgt_epi32(const __m256i &a,
 					     const __m256i &b) {
  IVEC_EM_BIN(_mm_cmpgt_epi32)
 }
@ -843,7 +843,7 @@ VEC_INLINE inline __m256i _cm256_cvtepu8_epi32(const __m128i &a) {
  }					       \
  return _mm256_load_si256((__m256i*) dest);

-VEC_INLINE inline __m256i _cm256_permutevar8x32_epi32(const __m256i &a, 
+VEC_INLINE inline __m256i _cm256_permutevar8x32_epi32(const __m256i &a,
 						      const __m256i &b) {
  IVEC_EM_SCAL(buf_a[buf_b[i]])
 }
@ -857,7 +857,7 @@ VEC_INLINE inline __m256i _cm256_srlv_epi32(__m256i a, __m256i b) {
 }


-VEC_INLINE inline __m256 _cm256_permutevar8x32_ps(const __m256 &a, 
+VEC_INLINE inline __m256 _cm256_permutevar8x32_ps(const __m256 &a,
 						  const __m256i &b) {
  return _mm256_castsi256_ps(_cm256_permutevar8x32_epi32(_mm256_castps_si256(a),
 							 b));
@ -877,10 +877,10 @@ VEC_INLINE inline __m256i _cm256_maskload_epi32(int const * mem, __m256i mask) {
 }


-VEC_INLINE inline __m256i _cm256_mask_i32gather_epi32(__m256i src, 
-						      int const * base_addr, 
-						      __m256i index, 
-						      __m256i mask, 
+VEC_INLINE inline __m256i _cm256_mask_i32gather_epi32(__m256i src,
+						      int const * base_addr,
+						      __m256i index,
+						      __m256i mask,
 						      const int scale) {
  assert(scale == sizeof(int));
  int buf_index[8] __attribute__((aligned(32)));
@ -895,16 +895,16 @@ VEC_INLINE inline __m256i _cm256_mask_i32gather_epi32(__m256i src,
  return _mm256_load_si256((__m256i*) dest);
 }

-VEC_INLINE inline __m256 _cm256_mask_i32gather_ps(__m256 src, 
-						  float const * base_addr, 
-						  __m256i index, __m256 mask, 
+VEC_INLINE inline __m256 _cm256_mask_i32gather_ps(__m256 src,
+						  float const * base_addr,
+						  __m256i index, __m256 mask,
 						  const int scale) {
  return _mm256_castsi256_ps(_cm256_mask_i32gather_epi32(
    _mm256_castps_si256(src), (const int *) base_addr, index,
    _mm256_castps_si256(mask), scale));
 }

-VEC_INLINE inline __m256d _cm256_mask_i32gather_pd(__m256d src, 
+VEC_INLINE inline __m256d _cm256_mask_i32gather_pd(__m256d src,
 						   double const * base_addr,
 						   __m128i index, __m256d mask,
 						   const int scale) {
@ -922,7 +922,7 @@ VEC_INLINE inline __m256d _cm256_mask_i32gather_pd(__m256d src,
 }

 VEC_INLINE inline __m256i _cm256_i32gather_epi32(int const * base_addr,
-						 __m256i index, 
+						 __m256i index,
 						 const int scale) {
  assert(scale == sizeof(int));
  int buf_index[8] __attribute__((aligned(32)));
@ -1007,12 +1007,12 @@ VEC_INLINE inline uint64_t _cext_u64(uint64_t tmp, uint64_t mask) {

 VEC_INLINE inline __m256 _mm256_compress_ps(__m256 mask, __m256 a) {
 # ifdef __AVX2__
-  uint64_t expanded_mask = _pdep_u64(_mm256_movemask_ps(mask), 
+  uint64_t expanded_mask = _pdep_u64(_mm256_movemask_ps(mask),
 				     0x0101010101010101);
  // unpack each bit to a byte
  expanded_mask *= 0xFF;   // mask |= mask<<1 | mask<<2 | ... | mask<<7;
  // the identity shuffle for vpermps, packed to one index per byte
-  const uint64_t identity_indices = 0x0706050403020100;   
+  const uint64_t identity_indices = 0x0706050403020100;
  uint64_t wanted_indices = _pext_u64(identity_indices, expanded_mask);

  __m128i bytevec = _mm_cvtsi64_si128(wanted_indices);
@ -1036,7 +1036,7 @@ VEC_INLINE inline __m256 _mm256_compress_ps(__m256 mask, __m256 a) {
 }
 VEC_INLINE inline __m256 _mm256_expand_ps(__m256 mask, __m256 a) {
 # ifdef __AVX2__
-  uint64_t expanded_mask = _pdep_u64(_mm256_movemask_ps(mask), 
+  uint64_t expanded_mask = _pdep_u64(_mm256_movemask_ps(mask),
 				     0x0101010101010101);
  expanded_mask *= 0xFF;
  const uint64_t identity_indices = 0x0706050403020100;
@ -1061,11 +1061,11 @@ VEC_INLINE inline __m256 _mm256_expand_ps(__m256 mask, __m256 a) {
 }

 VEC_INLINE inline __m256d _mm256_compress_pd(__m256d mask, __m256d a) {
-  return _mm256_castps_pd(_mm256_compress_ps(_mm256_castpd_ps(mask), 
+  return _mm256_castps_pd(_mm256_compress_ps(_mm256_castpd_ps(mask),
 					     _mm256_castpd_ps(a)));
 }
 VEC_INLINE inline __m256d _mm256_expand_pd(__m256d mask, __m256d a) {
-  return _mm256_castps_pd(_mm256_expand_ps(_mm256_castpd_ps(mask), 
+  return _mm256_castps_pd(_mm256_expand_ps(_mm256_castpd_ps(mask),
                                           _mm256_castpd_ps(a)));
 }
 #endif
@ -1093,12 +1093,12 @@ public:
  VEC_INLINE static BVEC_NAME kandn(const BVEC_NAME &a, const BVEC_NAME &b) {
    return FVEC_SUFFIX(_mm256_andnot_)(a.val_, b.val_);
  }
-  VEC_INLINE static BVEC_NAME masku_compress(const BVEC_NAME &mask, 
+  VEC_INLINE static BVEC_NAME masku_compress(const BVEC_NAME &mask,
 					     const BVEC_NAME &a) {
    return FVEC_SUFFIX(_mm256_compress_)(mask.val_, a.val_);
  }
-  VEC_INLINE static BVEC_NAME mask_expand(const BVEC_NAME &src, 
-					  const BVEC_NAME &mask, 
+  VEC_INLINE static BVEC_NAME mask_expand(const BVEC_NAME &src,
+					  const BVEC_NAME &mask,
 					  const BVEC_NAME &a) {
    FVEC_MASK_T ret = FVEC_SUFFIX(_mm256_expand_)(mask.val_, a.val_);
    ret = FVEC_SUFFIX(_mm256_and_)(mask.val_, ret);
@ -1244,7 +1244,7 @@ public:
  VEC_INLINE static IVEC_NAME mask_blend(
      const BVEC_NAME &mask, const IVEC_NAME &a, const IVEC_NAME &b
  ) {
-    return to(FVEC_SUFFIX(_mm256_blendv_)(from(a.val_), from(b.val_), 
+    return to(FVEC_SUFFIX(_mm256_blendv_)(from(a.val_), from(b.val_),
              mask.val_));
  }
  #define IVEC_MASK_BINFN_I(the_name)                                \
@ -1271,7 +1271,7 @@ public:
    return _mm256_and_si256(a.val_, b.val_);
  }

-  VEC_INLINE static IVEC_NAME masku_compress(const BVEC_NAME &mask, 
+  VEC_INLINE static IVEC_NAME masku_compress(const BVEC_NAME &mask,
 					     const IVEC_NAME &b) {
    return to(FVEC_SUFFIX(_mm256_compress_)(mask.val_, from(b.val_)));
  }
@ -1310,7 +1310,7 @@ public:
    printf("\n");
  }

-  VEC_INLINE static IVEC_NAME maskz_loadu(const BVEC_NAME &mask, 
+  VEC_INLINE static IVEC_NAME maskz_loadu(const BVEC_NAME &mask,
 					  const int * src) {
    FVEC_VEC_T mask_val = mask.val_;
 #   if FVEC_LEN==4
@ -1319,11 +1319,11 @@ public:
      {0, 2, 4, 6, 0, 0, 0, 0};
    __m256 m = _mm256_castpd_ps(mask_val);
    m = _mm256_permutevar8x32_ps(m, _mm256_load_si256((__m256i*)mask_shuffle));
-    __m128i ret = _mm_maskload_epi32(src, 
+    __m128i ret = _mm_maskload_epi32(src,
       _mm256_castsi256_si128(_mm256_castps_si256(m)));
    static const unsigned int load_shuffle[8] __attribute__((aligned(32))) =
      {0, 0, 1, 1, 2, 2, 3, 3};
-    return _mm256_permutevar8x32_epi32(_mm256_castsi128_si256(ret), 
+    return _mm256_permutevar8x32_epi32(_mm256_castsi128_si256(ret),
      _mm256_load_si256((__m256i*)load_shuffle));
 #    else
    int dest[8] __attribute__((aligned(32))) = {0};
@ -1344,11 +1344,11 @@ public:
  }

  VEC_INLINE static IVEC_NAME mask_gather(
-      const IVEC_NAME &src, const BVEC_NAME &mask, const IVEC_NAME &idx, 
+      const IVEC_NAME &src, const BVEC_NAME &mask, const IVEC_NAME &idx,
      const int * mem, const int scale
  ) {
    assert(scale == sizeof(int));
-    return _mm256_mask_i32gather_epi32(src.val_, mem, idx.val_, to(mask.val_), 
+    return _mm256_mask_i32gather_epi32(src.val_, mem, idx.val_, to(mask.val_),
 				       sizeof(int));
  }

@ -1433,15 +1433,15 @@ public:
  VEC_INLINE static __m256d _mm256_abs_pd(__m256d a) {
    const unsigned long long abs_mask = 0x7FFFFFFFFFFFFFFF;
    const unsigned long long abs_full[8] =
-        {abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, 
+        {abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask,
 	   abs_mask};
    return _mm256_and_pd(_mm256_load_pd((double*)abs_full), a);
  }
  VEC_INLINE static __m256 _mm256_abs_ps(__m256 a) {
    const unsigned long long abs_mask = 0x7FFFFFFF;
    const unsigned long long abs_full[16] =
-        {abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, 
-	   abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, 
+        {abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask,
+	   abs_mask, abs_mask, abs_mask, abs_mask, abs_mask, abs_mask,
 	   abs_mask, abs_mask, abs_mask};
    return _mm256_and_ps(_mm256_load_ps((float*)abs_full), a);
  }
@ -1533,14 +1533,14 @@ public:
  }


-  VEC_INLINE static FVEC_NAME gather(const IVEC_NAME &idx, 
+  VEC_INLINE static FVEC_NAME gather(const IVEC_NAME &idx,
    const FVEC_SCAL_T * mem, const int scale) {
    assert(scale == sizeof(FVEC_SCAL_T));
 #   if FVEC_LEN==4
 #    ifdef __AVX2__
    static const unsigned int mask_shuffle[8] __attribute__((aligned(32))) =
      {0, 2, 4, 6, 0, 0, 0, 0};
-    __m256i m = _mm256_permutevar8x32_epi32(idx.val_, 
+    __m256i m = _mm256_permutevar8x32_epi32(idx.val_,
      _mm256_load_si256((__m256i*)mask_shuffle));
    __m128i idx_short = _mm256_castsi256_si128(m);
    return FVEC_SUFFIX(_mm256_i32gather_)(mem, idx_short, sizeof(FVEC_SCAL_T));
@ -1566,10 +1566,10 @@ public:
 #    ifdef __AVX2__
    static const unsigned int mask_shuffle[8] __attribute__((aligned(32))) =
      {0, 2, 4, 6, 0, 0, 0, 0};
-    __m256i m = _mm256_permutevar8x32_epi32(idx.val_, 
+    __m256i m = _mm256_permutevar8x32_epi32(idx.val_,
      _mm256_load_si256((__m256i*)mask_shuffle));
    __m128i idx_short = _mm256_castsi256_si128(m);
-    return FVEC_SUFFIX(_mm256_mask_i32gather_)(src.val_, mem, idx_short, 
+    return FVEC_SUFFIX(_mm256_mask_i32gather_)(src.val_, mem, idx_short,
      mask.val_, sizeof(FVEC_SCAL_T));
 #    else
    int idx_buf[8] __attribute__((aligned(32)));
@ -1585,13 +1585,13 @@ public:
    return _mm256_load_pd(dest);
 #    endif
 #   else
-    return FVEC_SUFFIX(_mm256_mask_i32gather_)(src.val_, mem, idx.val_, 
+    return FVEC_SUFFIX(_mm256_mask_i32gather_)(src.val_, mem, idx.val_,
      mask.val_, sizeof(FVEC_SCAL_T));
 #   endif
  }

-  VEC_INLINE static void gather_4_adjacent(const IVEC_NAME &idx, 
-      const FVEC_SCAL_T * mem, const int scale, FVEC_NAME * out_0, 
+  VEC_INLINE static void gather_4_adjacent(const IVEC_NAME &idx,
+      const FVEC_SCAL_T * mem, const int scale, FVEC_NAME * out_0,
      FVEC_NAME * out_1, FVEC_NAME * out_2, FVEC_NAME * out_3) {
    assert(scale == sizeof(FVEC_SCAL_T));
    int idx_buf[8] __attribute__((aligned(32)));
@ -1632,11 +1632,11 @@ public:
    *out_3 = _mm256_shuffle_ps(b1, b3, 0xEE);
 #   endif
  }
-  VEC_INLINE static void gather_3_adjacent(const IVEC_NAME &idx, 
-					   const FVEC_SCAL_T * mem, 
-					   const int scale, 
-					   FVEC_NAME * out_0, 
-					   FVEC_NAME * out_1, 
+  VEC_INLINE static void gather_3_adjacent(const IVEC_NAME &idx,
+					   const FVEC_SCAL_T * mem,
+					   const int scale,
+					   FVEC_NAME * out_0,
+					   FVEC_NAME * out_1,
 					   FVEC_NAME * out_2) {
    assert(scale == sizeof(FVEC_SCAL_T));
    FVEC_NAME tmp_3;
@ -1662,7 +1662,7 @@ public:
  VEC_INLINE static FVEC_SCAL_T reduce_add(const FVEC_NAME &a) {
    return FVEC_SUFFIX(_mm256_reduce_add_)(a.val_);
  }
-  VEC_INLINE static FVEC_SCAL_T mask_reduce_add(const BVEC_NAME &mask, 
+  VEC_INLINE static FVEC_SCAL_T mask_reduce_add(const BVEC_NAME &mask,
 						const FVEC_NAME &a) {
    return reduce_add(FVEC_SUFFIX(_mm256_and_)(mask.val_, a.val_));
  }
@ -1797,11 +1797,11 @@ public:
      {4, 4, 5, 5, 6, 6, 7, 7};
    __m256d lo_mask = _mm256_castps_pd(_mm256_permutevar8x32_ps(mask.val_,
      _mm256_load_si256((__m256i*) lo_shuffle)));
-    __m256d hi_mask = _mm256_castps_pd(_mm256_permutevar8x32_ps(mask.val_, 
+    __m256d hi_mask = _mm256_castps_pd(_mm256_permutevar8x32_ps(mask.val_,
      _mm256_load_si256((__m256i*) hi_shuffle)));
-    __m256d lo = _mm256_mask_i32gather_pd(src.lo_, mem, get_si_lo(idx.val_), 
+    __m256d lo = _mm256_mask_i32gather_pd(src.lo_, mem, get_si_lo(idx.val_),
 					  lo_mask, sizeof(double));
-    __m256d hi = _mm256_mask_i32gather_pd(src.hi_, mem, get_si_hi(idx.val_), 
+    __m256d hi = _mm256_mask_i32gather_pd(src.hi_, mem, get_si_hi(idx.val_),
 					  hi_mask, sizeof(double));
 #   endif
    return avec8pd(lo, hi);
@ -1911,7 +1911,7 @@ public:
  VEC_INLINE static bvec masku_compress(const bvec &mask, const bvec &a) {
    return mask.val_ ? a.val_ : false;
  }
-  VEC_INLINE static bvec mask_expand(const bvec &src, const bvec &mask, 
+  VEC_INLINE static bvec mask_expand(const bvec &src, const bvec &mask,
 				     const bvec &a) {
    return mask.val_ ? a.val_ : src.val_;
  }
@ -2028,7 +2028,7 @@ public:
  VEC_INLINE static ivec maskz_loadu(const bvec &mask, const int * src) {
    return mask.val_ ? *src : 0;
  }
-  VEC_INLINE static void mask_storeu(const bvec &mask, int * dest, 
+  VEC_INLINE static void mask_storeu(const bvec &mask, int * dest,
    const ivec &src) {
    if (mask.val_) *dest = src.val_;
  }
@ -2037,21 +2037,21 @@ public:
  }

  VEC_INLINE static ivec mask_gather(
-      const ivec &src, const bvec &mask, const ivec &idx, const int * mem, 
+      const ivec &src, const bvec &mask, const ivec &idx, const int * mem,
 	const int scale
  ) {
    return mask.val_ ? *reinterpret_cast<const int *>
      (reinterpret_cast<const char*>(mem) + scale * idx.val_) : src.val_;
  }
  VEC_INLINE static void mask_i32scatter(
-      int * mem, const bvec &mask, const ivec &idx, const ivec &a, 
+      int * mem, const bvec &mask, const ivec &idx, const ivec &a,
 	const int scale
  ) {
-    if (mask.val_) *reinterpret_cast<int *>(reinterpret_cast<char*>(mem) + 
+    if (mask.val_) *reinterpret_cast<int *>(reinterpret_cast<char*>(mem) +
      scale * idx.val_) = a.val_;
  }

-  VEC_INLINE static void mask_compressstore(const bvec &mask, int * dest, 
+  VEC_INLINE static void mask_compressstore(const bvec &mask, int * dest,
      const ivec &src) {
    if (mask.val_) *dest = src.val_;
  }
@ -2175,7 +2175,7 @@ public:
  VEC_INLINE static fvec load(const flt_t *mem) {
    return *mem;
  }
-  VEC_INLINE static void mask_storeu(const bvec &mask, flt_t * dest, 
+  VEC_INLINE static void mask_storeu(const bvec &mask, flt_t * dest,
 				     const fvec &a) {
    if (mask.val_) *dest = a.val_;
  }
@ -2183,7 +2183,7 @@ public:
    *dest = a.val_;
  }

-  VEC_INLINE static fvec gather(const ivec &idx, const flt_t * mem, 
+  VEC_INLINE static fvec gather(const ivec &idx, const flt_t * mem,
 				const int scale) {
    return *reinterpret_cast<const flt_t*>(reinterpret_cast<const char*>(mem) +
      scale * idx.val_);
@ -2197,7 +2197,7 @@ public:
  }

  VEC_INLINE static void gather_3_adjacent(const ivec &idx, const flt_t * mem,
-					   const int scale, fvec * out_0, 
+					   const int scale, fvec * out_0,
 					   fvec * out_1, fvec * out_2) {
    assert(scale == sizeof(flt_t));
    *out_0 = gather(idx, mem + 0, scale);
@ -2205,8 +2205,8 @@ public:
    *out_2 = gather(idx, mem + 2, scale);
  }
  VEC_INLINE static void gather_4_adjacent(const ivec &idx, const flt_t * mem,
-					   const int scale, fvec * out_0, 
-					   fvec * out_1, fvec * out_2, 
+					   const int scale, fvec * out_0,
+					   fvec * out_1, fvec * out_2,
 					   fvec * out_3) {
    assert(scale == sizeof(flt_t));
    *out_0 = gather(idx, mem + 0, scale);
@ -2254,16 +2254,16 @@ public:
  VEC_INLINE static avec undefined() {
    return 1337.1337;
  }
-  VEC_INLINE static avec mask_gather(const avec &src, const bvec &mask, 
-				     const ivec &idx, const acc_t * mem, 
+  VEC_INLINE static avec mask_gather(const avec &src, const bvec &mask,
+				     const ivec &idx, const acc_t * mem,
 				     const int scale) {
    return mask.val_ ? *reinterpret_cast<const acc_t*>
      (reinterpret_cast<const char*>(mem) + scale * idx.val_) : src.val_;
  }
-  VEC_INLINE static void mask_i32loscatter(acc_t * mem, const bvec &mask, 
-					   const ivec &idx, const avec &a, 
+  VEC_INLINE static void mask_i32loscatter(acc_t * mem, const bvec &mask,
+					   const ivec &idx, const avec &a,
 					   const int scale) {
-    if (mask.val_) *reinterpret_cast<acc_t*>(reinterpret_cast<char*>(mem) + 
+    if (mask.val_) *reinterpret_cast<acc_t*>(reinterpret_cast<char*>(mem) +
 					     idx.val_ * scale) = a.val_;
  }

--- a/src/USER-INTEL/npair_full_bin_ghost_intel.h
+++ b/src/USER-INTEL/npair_full_bin_ghost_intel.h
@ -19,7 +19,7 @@

 NPairStyle(full/bin/ghost/intel,
           NPairFullBinGhostIntel,
-           NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF | 
+           NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF |
           NP_ORTHO | NP_TRI | NP_INTEL)

 #else
@ -40,8 +40,8 @@ class NPairFullBinGhostIntel : public NPairIntel {
  template<class flt_t, class acc_t>
  void fbi(NeighList * list, IntelBuffers<flt_t,acc_t> * buffers);
  template<class flt_t, class acc_t, int need_ic>
-  void fbi(const int offload, NeighList * list, 
-	   IntelBuffers<flt_t,acc_t> * buffers, 
+  void fbi(const int offload, NeighList * list,
+	   IntelBuffers<flt_t,acc_t> * buffers,
           const int astart, const int aend);
 };

--- a/src/USER-INTEL/pair_dpd_intel.h
+++ b/src/USER-INTEL/pair_dpd_intel.h
@ -85,7 +85,7 @@ class PairDPDIntel : public PairDPD {
    ForceConst() : _ntypes(0)  {}
    ~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); }

-    void set_ntypes(const int ntypes, const int nthreads, const int max_nbors, 
+    void set_ntypes(const int ntypes, const int nthreads, const int max_nbors,
                    Memory *memory, const int cop);

   private:
--- a/src/USER-MEAMC/meam.h
+++ b/src/USER-MEAMC/meam.h
@ -144,7 +144,7 @@ protected:
      a3 = a * a * a;
      a4 = a * a3;
      a1m4 = 1.0-a4;
-      
+
      dfc = 8 * a1m4 * a3;
      return a1m4*a1m4;
    }
--- a/src/USER-MESO/pair_edpd.h
+++ b/src/USER-MESO/pair_edpd.h
@ -42,7 +42,7 @@ class PairEDPD : public Pair {
 protected:
  double cut_global;
  int seed;
-  double **cut,**cutT; 
+  double **cut,**cutT;
  double **a0,**gamma;
  double **power;
  double **slope;
--- a/src/USER-MESO/pair_tdpd.h
+++ b/src/USER-MESO/pair_tdpd.h
@ -42,7 +42,7 @@ class PairTDPD : public Pair {
 protected:
  double cut_global,temperature;
  int seed,cc_species;
-  double **cut,**cutcc; 
+  double **cut,**cutcc;
  double **a0,**gamma,**sigma;
  double **power;
  double ***kappa,***epsilon;
--- a/src/USER-MISC/dihedral_spherical.h
+++ b/src/USER-MISC/dihedral_spherical.h
@ -36,7 +36,7 @@ class DihedralSpherical : public Dihedral {
  void write_restart(FILE *);
  void read_restart(FILE *);
  void write_data(FILE *);
-  
+
 protected:
  int    *nterms;
  double **Ccoeff;
--- a/src/USER-MISC/fix_ti_spring.h
+++ b/src/USER-MISC/fix_ti_spring.h
@ -12,7 +12,7 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-    Contributing authors: 
+    Contributing authors:
             Rodrigo Freitas (UC Berkeley) - rodrigof@berkeley.edu
             Mark Asta (UC Berkeley) - mdasta@berkeley.edu
             Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
@ -45,7 +45,7 @@ class FixTISpring : public Fix {
  void   initial_integrate(int);
  double compute_scalar();
  double compute_vector(int);
-  
+
  double memory_usage();
  void   grow_arrays(int);
  void   copy_arrays(int, int, int);
--- a/src/USER-MISC/pair_extep.h
+++ b/src/USER-MISC/pair_extep.h
@ -140,8 +140,8 @@ class PairExTeP : public Pair {
  }

  // splines parameters
-  // F[Ni=0-1, 1-2, 2-3, 
-  //   Nj=..., 
+  // F[Ni=0-1, 1-2, 2-3,
+  //   Nj=...,
  struct TF_corr_param {
    double
        f_00,
@ -156,7 +156,7 @@ class PairExTeP : public Pair {
        f_y_01,
        f_y_10,
        f_y_11;
-  } F_corr_param[MAXTYPES][MAXTYPES][NSPLINE][NSPLINE]; 
+  } F_corr_param[MAXTYPES][MAXTYPES][NSPLINE][NSPLINE];

  double F_corr_data[MAXTYPES][MAXTYPES][NSPLINE][NSPLINE][3];

--- a/src/USER-MISC/pair_momb.h
+++ b/src/USER-MISC/pair_momb.h
@ -5,7 +5,7 @@

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
+   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
--- a/src/USER-MISC/temper_npt.h
+++ b/src/USER-MISC/temper_npt.h
@ -9,7 +9,7 @@
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
-   
+
 ------------------------------------------------------------------------- */

 #ifdef COMMAND_CLASS
--- a/src/USER-MOFFF/improper_inversion_harmonic.h
+++ b/src/USER-MOFFF/improper_inversion_harmonic.h
@ -5,7 +5,7 @@

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
+   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
@ -39,11 +39,11 @@ class ImproperInversionHarmonic : public Improper {
  double *kw, *w0;
  void invang(const int &i1,const int &i2,const int &i3,const int &i4,
              const int &type,const int &evflag,const int &eflag,
-              const double &vb1x, const double &vb1y, const double &vb1z, 
+              const double &vb1x, const double &vb1y, const double &vb1z,
              const double &rrvb1, const double &rr2vb1,
-              const double &vb2x, const double &vb2y, const double &vb2z, 
+              const double &vb2x, const double &vb2y, const double &vb2z,
              const double &rrvb2, const double &rr2vb2,
-              const double &vb3x, const double &vb3y, const double &vb3z, 
+              const double &vb3x, const double &vb3y, const double &vb3z,
              const double &rrvb3, const double &rr2vb3);
  void allocate();
 };
--- a/src/USER-OMP/reaxc_bonds_omp.h
+++ b/src/USER-OMP/reaxc_bonds_omp.h
@ -1,12 +1,12 @@
 /*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
  Website: https://www.cs.purdue.edu/puremd
-  
+
  Copyright (2010) Purdue University
-  
-  Contributing authors: 
+
+  Contributing authors:
  H. M. Aktulga, J. Fogarty, S. Pandit, A. Grama
-  Corresponding author: 
+  Corresponding author:
  Hasan Metin Aktulga, Michigan State University, hma@cse.msu.edu

  Please cite the related publication:
--- a/src/USER-OMP/reaxc_forces_omp.h
+++ b/src/USER-OMP/reaxc_forces_omp.h
@ -1,12 +1,12 @@
 /*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
  Website: https://www.cs.purdue.edu/puremd
-  
+
  Copyright (2010) Purdue University
-  
-  Contributing authors: 
+
+  Contributing authors:
  H. M. Aktulga, J. Fogarty, S. Pandit, A. Grama
-  Corresponding author: 
+  Corresponding author:
  Hasan Metin Aktulga, Michigan State University, hma@cse.msu.edu

  Please cite the related publication:
--- a/src/USER-OMP/reaxc_hydrogen_bonds_omp.h
+++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.h
@ -1,12 +1,12 @@
 /*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
  Website: https://www.cs.purdue.edu/puremd
-  
+
  Copyright (2010) Purdue University
-  
-  Contributing authors: 
+
+  Contributing authors:
  H. M. Aktulga, J. Fogarty, S. Pandit, A. Grama
-  Corresponding author: 
+  Corresponding author:
  Hasan Metin Aktulga, Michigan State University, hma@cse.msu.edu

  Please cite the related publication:
--- a/src/USER-OMP/reaxc_init_md_omp.h
+++ b/src/USER-OMP/reaxc_init_md_omp.h
@ -1,12 +1,12 @@
 /*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
  Website: https://www.cs.purdue.edu/puremd
-  
+
  Copyright (2010) Purdue University
-  
-  Contributing authors: 
+
+  Contributing authors:
  H. M. Aktulga, J. Fogarty, S. Pandit, A. Grama
-  Corresponding author: 
+  Corresponding author:
  Hasan Metin Aktulga, Michigan State University, hma@cse.msu.edu

  Please cite the related publication:
--- a/src/USER-OMP/reaxc_multi_body_omp.h
+++ b/src/USER-OMP/reaxc_multi_body_omp.h
@ -1,12 +1,12 @@
 /*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
  Website: https://www.cs.purdue.edu/puremd
-  
+
  Copyright (2010) Purdue University
-  
-  Contributing authors: 
+
+  Contributing authors:
  H. M. Aktulga, J. Fogarty, S. Pandit, A. Grama
-  Corresponding author: 
+  Corresponding author:
  Hasan Metin Aktulga, Michigan State University, hma@cse.msu.edu

  Please cite the related publication:
--- a/src/USER-OMP/reaxc_valence_angles_omp.h
+++ b/src/USER-OMP/reaxc_valence_angles_omp.h
@ -1,12 +1,12 @@
 /*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
  Website: https://www.cs.purdue.edu/puremd
-  
+
  Copyright (2010) Purdue University
-  
-  Contributing authors: 
+
+  Contributing authors:
  H. M. Aktulga, J. Fogarty, S. Pandit, A. Grama
-  Corresponding author: 
+  Corresponding author:
  Hasan Metin Aktulga, Michigan State University, hma@cse.msu.edu

  Please cite the related publication:
--- a/src/USER-PHONON/fix_phonon.h
+++ b/src/USER-PHONON/fix_phonon.h
@ -70,7 +70,7 @@ class FixPhonon : public Fix {
  int ngroup, nfind;                            // total number of atoms in group; total number of atoms on this proc
  char *prefix, *logfile;                       // prefix of output file names
  FILE *flog;
-  
+
  double *M_inv_sqrt;

  class FFT3d *fft;                             // to do fft via the fft3d wraper
@ -79,7 +79,7 @@ class FixPhonon : public Fix {
  int *fft_cnts, *fft_disp;
  int fft_dim, fft_dim2;
  FFT_SCALAR *fft_data;
-  
+
  tagint itag;                                  // index variables
  int idx, idq;                                 // more index variables
  std::map<tagint,int> tag2surf;                // Mapping info