convert GPU package styles to use new neighbor list request API

2022-03-08 04:37:46 -05:00
parent 005f76a9e6
commit 1aa8b64283
58 changed files with 4429 additions and 5641 deletions
--- a/src/GPU/pair_beck_gpu.cpp
+++ b/src/GPU/pair_beck_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "gpu_extra.h"
 #include "math_special.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -37,23 +35,19 @@ using namespace MathSpecial;

 // External functions from cuda library for atom decomposition

-int beck_gpu_init(const int ntypes, double **cutsq, double **host_aa,
-                  double **alpha, double **beta, double **AA, double **BB,
-                  double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen);
+int beck_gpu_init(const int ntypes, double **cutsq, double **host_aa, double **alpha, double **beta,
+                  double **AA, double **BB, double *special_lj, const int nlocal, const int nall,
+                  const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
+                  FILE *screen);
 void beck_gpu_clear();
-int ** beck_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum,
-                          const double cpu_time, bool &success);
-void beck_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
+int **beck_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void beck_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double beck_gpu_bytes();

@ -81,7 +75,7 @@ PairBeckGPU::~PairBeckGPU()

 void PairBeckGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -89,7 +83,7 @@ void PairBeckGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -98,28 +92,24 @@ void PairBeckGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = beck_gpu_compute_n(neighbor->ago, inum, nall,
-                                     atom->x, atom->type, sublo,
-                                     subhi, atom->tag, atom->nspecial,
-                                     atom->special, eflag, vflag, eflag_atom,
-                                     vflag_atom, host_start,
-                                     &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        beck_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    beck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success);
+    beck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -139,10 +129,9 @@ void PairBeckGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +139,15 @@ void PairBeckGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = beck_gpu_init(atom->ntypes+1, cutsq, aa, alpha, beta,
-                              AA, BB, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = beck_gpu_init(atom->ntypes + 1, cutsq, aa, alpha, beta, AA, BB, force->special_lj,
+                              atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
+                              gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -177,15 +160,15 @@ double PairBeckGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBeckGPU::cpu_compute(int start, int inum, int eflag,
-                              int /* vflag */, int *ilist,
-                              int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r5,force_beck,factor_lj;
-  double r,rinv;
-  double aaij,alphaij,betaij;
-  double term1,term1inv,term2,term3,term4,term5,term6;
+void PairBeckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                              int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r5, force_beck, factor_lj;
+  double r, rinv;
+  double aaij, alphaij, betaij;
+  double term1, term1inv, term2, term3, term4, term5, term6;
  int *jlist;

  double **x = atom->x;
@ -212,39 +195,39 @@ void PairBeckGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
        r = sqrt(rsq);
-        r5 = rsq*rsq*r;
+        r5 = rsq * rsq * r;
        aaij = aa[itype][jtype];
        alphaij = alpha[itype][jtype];
        betaij = beta[itype][jtype];
-        term1 = aaij*aaij + rsq;
-        term2 = powint(term1,-5);
-        term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq;
-        term4 = alphaij + r5*betaij;
-        term5 = alphaij + 6.0*r5*betaij;
-        rinv  = 1.0/r;
-        force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5;
-        force_beck -= BB[itype][jtype]*r*term2*term3;
+        term1 = aaij * aaij + rsq;
+        term2 = powint(term1, -5);
+        term3 = 21.672 + 30.0 * aaij * aaij + 6.0 * rsq;
+        term4 = alphaij + r5 * betaij;
+        term5 = alphaij + 6.0 * r5 * betaij;
+        rinv = 1.0 / r;
+        force_beck = AA[itype][jtype] * exp(-1.0 * r * term4) * term5;
+        force_beck -= BB[itype][jtype] * r * term2 * term3;

-        fpair = factor_lj*force_beck*rinv;
+        fpair = factor_lj * force_beck * rinv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          term6 = powint(term1,-3);
-          term1inv = 1.0/term1;
-          evdwl = AA[itype][jtype]*exp(-1.0*r*term4);
-          evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv);
+          term6 = powint(term1, -3);
+          term1inv = 1.0 / term1;
+          evdwl = AA[itype][jtype] * exp(-1.0 * r * term4);
+          evdwl -= BB[itype][jtype] * term6 * (1.0 + (2.709 + 3.0 * aaij * aaij) * term1inv);
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_born_coul_long_cs_gpu.cpp
+++ b/src/GPU/pair_born_coul_long_cs_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "kspace.h"
 #include "math_const.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -35,14 +33,14 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;

-#define EWALD_F   1.12837917
-#define EWALD_P   9.95473818e-1
-#define B0       -0.1335096380159268
-#define B1       -2.57839507e-1
-#define B2       -1.37203639e-1
-#define B3       -8.88822059e-3
-#define B4       -5.80844129e-3
-#define B5        1.14652755e-1
+#define EWALD_F 1.12837917
+#define EWALD_P 9.95473818e-1
+#define B0 -0.1335096380159268
+#define B1 -2.57839507e-1
+#define B2 -1.37203639e-1
+#define B3 -8.88822059e-3
+#define B4 -5.80844129e-3
+#define B5 1.14652755e-1

 #define EPSILON 1.0e-20
 #define EPS_EWALD 1.0e-6
@ -50,37 +48,31 @@ using namespace MathConst;

 // External functions from cuda library for atom decomposition

-int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                      double **host_born1, double **host_born2,
-                      double **host_born3, double **host_a,
-                      double **host_c, double **host_d,
-                      double **sigma, double **offset, double *special_lj,
-                      const int inum, const int nall, const int max_nbors,
-                      const int maxspecial, const double cell_size,
-                      int &gpu_mode, FILE *screen, double **host_cut_ljsq,
-                      double host_cut_coulsq, double *host_special_coul,
-                      const double qqrd2e, const double g_ewald);
+int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
+                      double **host_born2, double **host_born3, double **host_a, double **host_c,
+                      double **host_d, double **sigma, double **offset, double *special_lj,
+                      const int inum, const int nall, const int max_nbors, const int maxspecial,
+                      const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                      double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                      const double g_ewald);
 void bornclcs_gpu_clear();
-int** bornclcs_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum,  const double cpu_time,
-                           bool &success, double *host_q, double *boxlo,
-                           double *prd);
-void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, int *ilist, int *numj,
-                        int **firstneigh, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success, double *host_q,
-                        const int nlocal, double *boxlo, double *prd);
+int **bornclcs_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                             int *host_type, double *sublo, double *subhi, tagint *tag,
+                             int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                             const bool eatom, const bool vatom, int &host_start, int **ilist,
+                             int **jnum, const double cpu_time, bool &success, double *host_q,
+                             double *boxlo, double *prd);
+void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                          int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                          const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                          const double cpu_time, bool &success, double *host_q, const int nlocal,
+                          double *boxlo, double *prd);
 double bornclcs_gpu_bytes();

 /* ---------------------------------------------------------------------- */

 PairBornCoulLongCSGPU::PairBornCoulLongCSGPU(LAMMPS *lmp) :
-  PairBornCoulLongCS(lmp), gpu_mode(GPU_FORCE)
+    PairBornCoulLongCS(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -102,7 +94,7 @@ PairBornCoulLongCSGPU::~PairBornCoulLongCSGPU()

 void PairBornCoulLongCSGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -110,7 +102,7 @@ void PairBornCoulLongCSGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -119,30 +111,25 @@ void PairBornCoulLongCSGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = bornclcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                      atom->type, sublo, subhi,
-                                      atom->tag, atom->nspecial, atom->special,
-                                      eflag, vflag, eflag_atom, vflag_atom,
-                                      host_start, &ilist, &numneigh, cpu_time,
-                                      success, atom->q, domain->boxlo,
-                                      domain->prd);
+    firstneigh = bornclcs_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    bornclcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                       vflag_atom, host_start, cpu_time, success, atom->q,
-                       atom->nlocal, domain->boxlo, domain->prd);
+    bornclcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
+                         firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                         success, atom->q, atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -164,10 +151,9 @@ void PairBornCoulLongCSGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -179,29 +165,20 @@ void PairBornCoulLongCSGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = bornclcs_gpu_init(atom->ntypes+1, cutsq,  rhoinv,
-                                born1, born2, born3, a, c, d, sigma,
-                                offset, force->special_lj, atom->nlocal,
-                                  atom->nlocal+atom->nghost, mnf, maxspecial,
-                                   cell_size, gpu_mode, screen, cut_ljsq,
-                                cut_coulsq, force->special_coul,
-                                force->qqrd2e, g_ewald);
+  int success = bornclcs_gpu_init(
+      atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma, offset,
+      force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
+      gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -214,15 +191,14 @@ double PairBornCoulLongCSGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
-                                      int /* vflag */, int *ilist,
-                                      int *numneigh, int **firstneigh)
+void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                        int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itable,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double fraction,table;
-  double r,rsq,rexp,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
-  double grij,expm2,prefactor,t,erfc,u;
+  int i, j, ii, jj, jnum, itable, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double fraction, table;
+  double r, rsq, rexp, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
+  double grij, expm2, prefactor, t, erfc, u;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -256,39 +232,42 @@ void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {

        if (rsq < cut_coulsq) {
-          rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
-          r2inv = 1.0/rsq;
+          rsq +=
+              EPSILON;    // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+          r2inv = 1.0 / rsq;
          if (!ncoultablebits || rsq <= tabinnersq) {
            r = sqrt(rsq);
-            prefactor = qqrd2e * qtmp*q[j];
+            prefactor = qqrd2e * qtmp * q[j];
            if (factor_coul < 1.0) {
              // When bonded parts are being calculated a minimal distance (EPS_EWALD)
              // has to be added to the prefactor and erfc in order to make the
              // used approximation functions for the Ewald correction valid
-              grij = g_ewald * (r+EPS_EWALD);
-              expm2 = exp(-grij*grij);
-              t = 1.0 / (1.0 + EWALD_P*grij);
+              grij = g_ewald * (r + EPS_EWALD);
+              expm2 = exp(-grij * grij);
+              t = 1.0 / (1.0 + EWALD_P * grij);
              u = 1.0 - t;
-              erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
-              prefactor /= (r+EPS_EWALD);
-              forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - (1.0-factor_coul));
+              erfc =
+                  t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
+              prefactor /= (r + EPS_EWALD);
+              forcecoul = prefactor * (erfc + EWALD_F * grij * expm2 - (1.0 - factor_coul));
              // Additionally r2inv needs to be accordingly modified since the later
              // scaling of the overall force shall be consistent
-              r2inv = 1.0/(rsq + EPS_EWALD_SQR);
+              r2inv = 1.0 / (rsq + EPS_EWALD_SQR);
            } else {
              grij = g_ewald * r;
-              expm2 = exp(-grij*grij);
-              t = 1.0 / (1.0 + EWALD_P*grij);
+              expm2 = exp(-grij * grij);
+              t = 1.0 / (1.0 + EWALD_P * grij);
              u = 1.0 - t;
-              erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+              erfc =
+                  t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
              prefactor /= r;
-              forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+              forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
            }
          } else {
            union_int_float_t rsq_lookup;
@ -296,47 +275,51 @@ void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
            itable = rsq_lookup.i & ncoulmask;
            itable >>= ncoulshiftbits;
            fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
-            table = ftable[itable] + fraction*dftable[itable];
-            forcecoul = qtmp*q[j] * table;
+            table = ftable[itable] + fraction * dftable[itable];
+            forcecoul = qtmp * q[j] * table;
            if (factor_coul < 1.0) {
-              table = ctable[itable] + fraction*dctable[itable];
-              prefactor = qtmp*q[j] * table;
-              forcecoul -= (1.0-factor_coul)*prefactor;
+              table = ctable[itable] + fraction * dctable[itable];
+              prefactor = qtmp * q[j] * table;
+              forcecoul -= (1.0 - factor_coul) * prefactor;
            }
          }

          forcecoul *= r2inv;

-        } else forcecoul = 0;
+        } else
+          forcecoul = 0;

-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        r = sqrt(rsq);
        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
-          forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
-            + born3[itype][jtype]*r2inv*r6inv;
-        } else forceborn = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
+          forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
+              born3[itype][jtype] * r2inv * r6inv;
+        } else
+          forceborn = 0.0;

-        fpair = forcecoul + factor_lj*forceborn * r2inv;
+        fpair = forcecoul + factor_lj * forceborn * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
-            ecoul = prefactor*erfc;
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            ecoul = prefactor * erfc;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
-              + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+            evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
+                d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_born_coul_long_gpu.cpp
+++ b/src/GPU/pair_born_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,56 +25,48 @@
 #include "kspace.h"
 #include "math_const.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;
 using namespace MathConst;

 // External functions from cuda library for atom decomposition

-int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                    double **host_born1, double **host_born2,
-                    double **host_born3, double **host_a,
-                    double **host_c, double **host_d,
-                    double **sigma, double **offset, double *special_lj,
-                    const int inum, const int nall, const int max_nbors,
-                    const int maxspecial, const double cell_size,
-                    int &gpu_mode, FILE *screen, double **host_cut_ljsq,
-                    double host_cut_coulsq, double *host_special_coul,
-                    const double qqrd2e, const double g_ewald);
+int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
+                    double **host_born2, double **host_born3, double **host_a, double **host_c,
+                    double **host_d, double **sigma, double **offset, double *special_lj,
+                    const int inum, const int nall, const int max_nbors, const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                    double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                    const double g_ewald);
 void borncl_gpu_clear();
-int** borncl_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum,  const double cpu_time,
-                           bool &success, double *host_q, double *boxlo,
-                           double *prd);
-void borncl_gpu_compute(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, int *ilist, int *numj,
-                        int **firstneigh, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success, double *host_q,
-                        const int nlocal, double *boxlo, double *prd);
+int **borncl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                           int *host_type, double *sublo, double *subhi, tagint *tag,
+                           int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start, int **ilist,
+                           int **jnum, const double cpu_time, bool &success, double *host_q,
+                           double *boxlo, double *prd);
+void borncl_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                        int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                        const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q, const int nlocal,
+                        double *boxlo, double *prd);
 double borncl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairBornCoulLongGPU::PairBornCoulLongGPU(LAMMPS *lmp) :
-  PairBornCoulLong(lmp), gpu_mode(GPU_FORCE)
+PairBornCoulLongGPU::PairBornCoulLongGPU(LAMMPS *lmp) : PairBornCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -97,7 +88,7 @@ PairBornCoulLongGPU::~PairBornCoulLongGPU()

 void PairBornCoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -105,7 +96,7 @@ void PairBornCoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -114,30 +105,25 @@ void PairBornCoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = borncl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                      atom->type, sublo, subhi,
-                                      atom->tag, atom->nspecial, atom->special,
-                                      eflag, vflag, eflag_atom, vflag_atom,
-                                      host_start, &ilist, &numneigh, cpu_time,
-                                      success, atom->q, domain->boxlo,
-                                      domain->prd);
+    firstneigh = borncl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                      atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                      eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                      cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    borncl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                       vflag_atom, host_start, cpu_time, success, atom->q,
+    borncl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                       eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                       atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -150,8 +136,7 @@ void PairBornCoulLongGPU::compute(int eflag, int vflag)

 void PairBornCoulLongGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR, "Pair style born/coul/long/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style born/coul/long/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -159,10 +144,9 @@ void PairBornCoulLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -174,33 +158,24 @@ void PairBornCoulLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = borncl_gpu_init(atom->ntypes+1, cutsq,  rhoinv,
-                                born1, born2, born3, a, c, d, sigma,
-                                offset, force->special_lj, atom->nlocal,
-                                  atom->nlocal+atom->nghost, mnf, maxspecial,
-                                   cell_size, gpu_mode, screen, cut_ljsq,
-                                cut_coulsq, force->special_coul,
-                                force->qqrd2e, g_ewald);
+  int success = borncl_gpu_init(
+      atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma, offset,
+      force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
+      gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -213,14 +188,13 @@ double PairBornCoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag,
-                                      int /* vflag */, int *ilist,
+void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                      int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double r,rexp,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
-  double grij,expm2,prefactor,t,erfc;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double r, rexp, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
+  double grij, expm2, prefactor, t, erfc;
  int *jlist;
  double rsq;

@ -255,49 +229,53 @@ void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        r = sqrt(rsq);

        if (rsq < cut_coulsq) {
          grij = g_ewald * r;
-          expm2 = exp(-grij*grij);
-          t = 1.0 / (1.0 + EWALD_P*grij);
-          erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-          prefactor = qqrd2e * qtmp*q[j]/r;
-          forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
-        } else forcecoul = 0.0;
+          expm2 = exp(-grij * grij);
+          t = 1.0 / (1.0 + EWALD_P * grij);
+          erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+          prefactor = qqrd2e * qtmp * q[j] / r;
+          forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
-          forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
-            + born3[itype][jtype]*r2inv*r6inv;
-        } else forceborn = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
+          forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
+              born3[itype][jtype] * r2inv * r6inv;
+        } else
+          forceborn = 0.0;

-        fpair = (forcecoul + factor_lj*forceborn) * r2inv;
+        fpair = (forcecoul + factor_lj * forceborn) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
-            ecoul = prefactor*erfc;
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            ecoul = prefactor * erfc;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
-              + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+            evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
+                d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_born_coul_wolf_cs_gpu.cpp
+++ b/src/GPU/pair_born_coul_wolf_cs_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
 #include "gpu_extra.h"
 #include "math_const.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -38,39 +36,31 @@ using namespace MathConst;

 // External functions from cuda library for atom decomposition

-int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                      double **host_born1, double **host_born2,
-                      double **host_born3, double **host_a, double **host_c,
-                      double **host_d, double **sigma, double **offset,
-                      double *special_lj, const int inum, const int nall,
-                      const int max_nbors, const int maxspecial,
-                      const double cell_size, int &gpu_mode, FILE *screen,
-                      double **host_cut_ljsq, double host_cut_coulsq,
-                      double *host_special_coul, const double qqrd2e,
-                      const double alf, const double e_shift,
-                      const double f_shift);
+int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
+                      double **host_born2, double **host_born3, double **host_a, double **host_c,
+                      double **host_d, double **sigma, double **offset, double *special_lj,
+                      const int inum, const int nall, const int max_nbors, const int maxspecial,
+                      const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                      double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                      const double alf, const double e_shift, const double f_shift);
 void borncwcs_gpu_clear();
-int ** borncwcs_gpu_compute_n(const int ago, const int inum_full,
-                              const int nall, double **host_x, int *host_type,
-                              double *sublo, double *subhi, tagint *tag,
-                              int **nspecial, tagint **special,
-                              const bool eflag, const bool vflag,
-                              const bool eatom, const bool vatom,
-                              int &host_start, int **ilist, int **jnum,
-                              const double cpu_time, bool &success,
-                              double *host_q, double *boxlo, double *prd);
-void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, int *ilist, int *numj,
-                        int **firstneigh, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success, double *host_q,
-                        const int nlocal, double *boxlo, double *prd);
+int **borncwcs_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                             int *host_type, double *sublo, double *subhi, tagint *tag,
+                             int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                             const bool eatom, const bool vatom, int &host_start, int **ilist,
+                             int **jnum, const double cpu_time, bool &success, double *host_q,
+                             double *boxlo, double *prd);
+void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                          int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                          const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                          const double cpu_time, bool &success, double *host_q, const int nlocal,
+                          double *boxlo, double *prd);
 double borncwcs_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairBornCoulWolfCSGPU::PairBornCoulWolfCSGPU(LAMMPS *lmp) : PairBornCoulWolfCS(lmp),
-                                                      gpu_mode(GPU_FORCE)
+PairBornCoulWolfCSGPU::PairBornCoulWolfCSGPU(LAMMPS *lmp) :
+    PairBornCoulWolfCS(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -92,7 +82,7 @@ PairBornCoulWolfCSGPU::~PairBornCoulWolfCSGPU()

 void PairBornCoulWolfCSGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -100,7 +90,7 @@ void PairBornCoulWolfCSGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -109,30 +99,25 @@ void PairBornCoulWolfCSGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = borncwcs_gpu_compute_n(neighbor->ago, inum, nall,
-                                      atom->x, atom->type, sublo,
-                                      subhi, atom->tag, atom->nspecial,
-                                      atom->special, eflag, vflag, eflag_atom,
-                                      vflag_atom, host_start,
-                                      &ilist, &numneigh, cpu_time, success,
-                                      atom->q, domain->boxlo, domain->prd);
+    firstneigh = borncwcs_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    borncwcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                       vflag_atom, host_start, cpu_time, success, atom->q,
-                       atom->nlocal, domain->boxlo, domain->prd);
+    borncwcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
+                         firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                         success, atom->q, atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -154,10 +139,9 @@ void PairBornCoulWolfCSGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -167,28 +151,21 @@ void PairBornCoulWolfCSGPU::init_style()

  cut_coulsq = cut_coul * cut_coul;

-  double e_shift = erfc(alf*cut_coul)/cut_coul;
-  double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
-    cut_coul;
+  double e_shift = erfc(alf * cut_coul) / cut_coul;
+  double f_shift =
+      -(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = borncwcs_gpu_init(atom->ntypes+1, cutsq, rhoinv,
-                                born1, born2, born3, a, c, d, sigma, offset,
-                                force->special_lj, atom->nlocal,
-                                atom->nlocal+atom->nghost, mnf, maxspecial,
-                                cell_size, gpu_mode, screen, cut_ljsq,
-                                cut_coulsq, force->special_coul, force->qqrd2e,
-                                alf, e_shift, f_shift);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      borncwcs_gpu_init(atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma,
+                        offset, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                        maxspecial, cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
+                        force->special_coul, force->qqrd2e, alf, e_shift, f_shift);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -201,15 +178,15 @@ double PairBornCoulWolfCSGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
-                                      int /* vflag */, int *ilist,
-                                      int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
-  double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
+void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                        int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, qtmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double rsq, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
+  double erfcc, erfcd, v_sh, dvdrr, e_self, qisq;
  double prefactor;
-  double r,rexp;
+  double r, rexp;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -223,9 +200,9 @@ void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
  double *special_lj = force->special_lj;
  double qqrd2e = force->qqrd2e;

-  double e_shift = erfc(alf*cut_coul)/cut_coul;
-  double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
-    cut_coul;
+  double e_shift = erfc(alf * cut_coul) / cut_coul;
+  double f_shift =
+      -(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;

  // loop over neighbors of my atoms

@ -239,9 +216,9 @@ void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
    jlist = firstneigh[i];
    jnum = numneigh[i];

-    qisq = qtmp*qtmp;
-    e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
-    if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
+    qisq = qtmp * qtmp;
+    e_self = -(e_shift / 2.0 + alf / MY_PIS) * qisq * qqrd2e;
+    if (evflag) ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
@ -252,51 +229,56 @@ void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        rsq += EPSILON; // Add EPSILON for case: r = 0; Interaction must be removed by special bond
-        r2inv = 1.0/rsq;
+        rsq +=
+            EPSILON;    // Add EPSILON for case: r = 0; Interaction must be removed by special bond
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
          r = sqrt(rsq);
-          prefactor = qqrd2e*qtmp*q[j]/r;
-          erfcc = erfc(alf*r);
-          erfcd = exp(-alf*alf*r*r);
-          v_sh = (erfcc - e_shift*r) * prefactor;
-          dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
-          forcecoul = dvdrr*rsq*prefactor;
-          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
-        } else forcecoul = 0.0;
+          prefactor = qqrd2e * qtmp * q[j] / r;
+          erfcc = erfc(alf * r);
+          erfcd = exp(-alf * alf * r * r);
+          v_sh = (erfcc - e_shift * r) * prefactor;
+          dvdrr = (erfcc / rsq + 2.0 * alf / MY_PIS * erfcd / r) + f_shift;
+          forcecoul = dvdrr * rsq * prefactor;
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
+          r6inv = r2inv * r2inv * r2inv;
          r = sqrt(rsq);
-          rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
-          forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
-            born3[itype][jtype]*r2inv*r6inv;
-        } else forceborn = 0.0;
+          rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
+          forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
+              born3[itype][jtype] * r2inv * r6inv;
+        } else
+          forceborn = 0.0;

-        fpair = (factor_coul*forcecoul + factor_lj*forceborn) * r2inv;
+        fpair = (factor_coul * forcecoul + factor_lj * forceborn) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            ecoul = v_sh;
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
-              d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+            evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
+                d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_born_coul_wolf_gpu.cpp
+++ b/src/GPU/pair_born_coul_wolf_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
 #include "gpu_extra.h"
 #include "math_const.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -36,39 +34,30 @@ using namespace MathConst;

 // External functions from cuda library for atom decomposition

-int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                    double **host_born1, double **host_born2,
-                    double **host_born3, double **host_a, double **host_c,
-                    double **host_d, double **sigma, double **offset,
-                    double *special_lj, const int inum,
-                    const int nall, const int max_nbors, const int maxspecial,
-                    const double cell_size, int &gpu_mode, FILE *screen,
-                    double **host_cut_ljsq, double host_cut_coulsq,
-                    double *host_special_coul, const double qqrd2e,
-                    const double alf, const double e_shift,
-                    const double f_shift);
+int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
+                    double **host_born2, double **host_born3, double **host_a, double **host_c,
+                    double **host_d, double **sigma, double **offset, double *special_lj,
+                    const int inum, const int nall, const int max_nbors, const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                    double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                    const double alf, const double e_shift, const double f_shift);
 void borncw_gpu_clear();
-int ** borncw_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                            double **host_x, int *host_type, double *sublo,
-                            double *subhi, tagint *tag, int **nspecial,
-                            tagint **special, const bool eflag,
-                            const bool vflag, const bool eatom,
-                            const bool vatom, int &host_start,
-                            int **ilist, int **jnum, const double cpu_time,
-                            bool &success, double *host_q, double *boxlo,
-                            double *prd);
-void borncw_gpu_compute(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, int *ilist, int *numj,
-                        int **firstneigh, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success, double *host_q,
-                        const int nlocal, double *boxlo, double *prd);
+int **borncw_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                           int *host_type, double *sublo, double *subhi, tagint *tag,
+                           int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start, int **ilist,
+                           int **jnum, const double cpu_time, bool &success, double *host_q,
+                           double *boxlo, double *prd);
+void borncw_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                        int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                        const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q, const int nlocal,
+                        double *boxlo, double *prd);
 double borncw_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairBornCoulWolfGPU::PairBornCoulWolfGPU(LAMMPS *lmp) : PairBornCoulWolf(lmp),
-                                                      gpu_mode(GPU_FORCE)
+PairBornCoulWolfGPU::PairBornCoulWolfGPU(LAMMPS *lmp) : PairBornCoulWolf(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -90,7 +79,7 @@ PairBornCoulWolfGPU::~PairBornCoulWolfGPU()

 void PairBornCoulWolfGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -98,7 +87,7 @@ void PairBornCoulWolfGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -107,30 +96,25 @@ void PairBornCoulWolfGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = borncw_gpu_compute_n(neighbor->ago, inum, nall,
-                                      atom->x, atom->type, sublo,
-                                      subhi, atom->tag, atom->nspecial,
-                                      atom->special, eflag, vflag, eflag_atom,
-                                      vflag_atom, host_start,
-                                      &ilist, &numneigh, cpu_time, success,
-                                      atom->q, domain->boxlo, domain->prd);
+    firstneigh = borncw_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                      atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                      eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                      cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    borncw_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                       vflag_atom, host_start, cpu_time, success, atom->q,
+    borncw_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                       eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                       atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -143,8 +127,7 @@ void PairBornCoulWolfGPU::compute(int eflag, int vflag)

 void PairBornCoulWolfGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR, "Pair style born/coul/wolf/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style born/coul/wolf/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -152,10 +135,9 @@ void PairBornCoulWolfGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -165,28 +147,21 @@ void PairBornCoulWolfGPU::init_style()

  cut_coulsq = cut_coul * cut_coul;

-  double e_shift = erfc(alf*cut_coul)/cut_coul;
-  double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
-    cut_coul;
+  double e_shift = erfc(alf * cut_coul) / cut_coul;
+  double f_shift =
+      -(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = borncw_gpu_init(atom->ntypes+1, cutsq, rhoinv,
-                                born1, born2, born3, a, c, d, sigma, offset,
-                                force->special_lj, atom->nlocal,
-                                atom->nlocal+atom->nghost, mnf, maxspecial,
-                                cell_size, gpu_mode, screen, cut_ljsq,
-                                cut_coulsq, force->special_coul, force->qqrd2e,
-                                alf, e_shift, f_shift);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      borncw_gpu_init(atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma, offset,
+                      force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial,
+                      cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul,
+                      force->qqrd2e, alf, e_shift, f_shift);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -199,15 +174,15 @@ double PairBornCoulWolfGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
-                                      int /* vflag */, int *ilist,
-                                      int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
-  double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
+void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                      int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, qtmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double rsq, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
+  double erfcc, erfcd, v_sh, dvdrr, e_self, qisq;
  double prefactor;
-  double r,rexp;
+  double r, rexp;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -221,9 +196,9 @@ void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
  double *special_lj = force->special_lj;
  double qqrd2e = force->qqrd2e;

-  double e_shift = erfc(alf*cut_coul)/cut_coul;
-  double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
-    cut_coul;
+  double e_shift = erfc(alf * cut_coul) / cut_coul;
+  double f_shift =
+      -(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;

  // loop over neighbors of my atoms

@ -237,9 +212,9 @@ void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
    jlist = firstneigh[i];
    jnum = numneigh[i];

-    qisq = qtmp*qtmp;
-    e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
-    if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
+    qisq = qtmp * qtmp;
+    e_self = -(e_shift / 2.0 + alf / MY_PIS) * qisq * qqrd2e;
+    if (evflag) ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
@ -250,50 +225,54 @@ void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
          r = sqrt(rsq);
-          prefactor = qqrd2e*qtmp*q[j]/r;
-          erfcc = erfc(alf*r);
-          erfcd = exp(-alf*alf*r*r);
-          v_sh = (erfcc - e_shift*r) * prefactor;
-          dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
-          forcecoul = dvdrr*rsq*prefactor;
-          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
-        } else forcecoul = 0.0;
+          prefactor = qqrd2e * qtmp * q[j] / r;
+          erfcc = erfc(alf * r);
+          erfcd = exp(-alf * alf * r * r);
+          v_sh = (erfcc - e_shift * r) * prefactor;
+          dvdrr = (erfcc / rsq + 2.0 * alf / MY_PIS * erfcd / r) + f_shift;
+          forcecoul = dvdrr * rsq * prefactor;
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
+          r6inv = r2inv * r2inv * r2inv;
          r = sqrt(rsq);
-          rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
-          forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
-            born3[itype][jtype]*r2inv*r6inv;
-        } else forceborn = 0.0;
+          rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
+          forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
+              born3[itype][jtype] * r2inv * r6inv;
+        } else
+          forceborn = 0.0;

-        fpair = (factor_coul*forcecoul + factor_lj*forceborn) * r2inv;
+        fpair = (factor_coul * forcecoul + factor_lj * forceborn) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            ecoul = v_sh;
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
-              d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+            evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
+                d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_born_gpu.cpp
+++ b/src/GPU/pair_born_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,29 +32,23 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                  double **host_born1, double **host_born2, double **host_born3,
-                  double **host_a, double **host_c, double **host_d,
-                  double **host_sigma, double **offset, double *special_lj,
-                  const int inum, const int nall, const int max_nbors,
-                  const int maxspecial, const double cell_size,
-                  int &gpu_mode, FILE *screen);
-void born_gpu_reinit(const int ntypes, double **host_rhoinv,
-                     double **host_born1, double **host_born2,
-                     double **host_born3, double **host_a, double **host_c,
+int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
+                  double **host_born2, double **host_born3, double **host_a, double **host_c,
+                  double **host_d, double **host_sigma, double **offset, double *special_lj,
+                  const int inum, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen);
+void born_gpu_reinit(const int ntypes, double **host_rhoinv, double **host_born1,
+                     double **host_born2, double **host_born3, double **host_a, double **host_c,
                     double **host_d, double **offset);
 void born_gpu_clear();
-int ** born_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success);
-void born_gpu_compute(const int ago, const int inum_full, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
+int **born_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void born_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double born_gpu_bytes();

@ -83,7 +75,7 @@ PairBornGPU::~PairBornGPU()

 void PairBornGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -91,7 +83,7 @@ void PairBornGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -100,28 +92,24 @@ void PairBornGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = born_gpu_compute_n(neighbor->ago, inum, nall,
-                                    atom->x, atom->type, sublo,
-                                    subhi, atom->tag, atom->nspecial,
-                                    atom->special, eflag, vflag, eflag_atom,
-                                    vflag_atom, host_start,
-                                    &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        born_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    born_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success);
+    born_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -141,10 +129,9 @@ void PairBornGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -152,22 +139,15 @@ void PairBornGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = born_gpu_init(atom->ntypes+1, cutsq, rhoinv,
-                              born1, born2, born3, a, c, d, sigma,
-                              offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-              cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = born_gpu_init(atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma,
+                              offset, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost,
+                              mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -176,8 +156,7 @@ void PairBornGPU::reinit()
 {
  Pair::reinit();

-  born_gpu_reinit(atom->ntypes+1, rhoinv, born1, born2, born3,
-                  a, c, d, offset);
+  born_gpu_reinit(atom->ntypes + 1, rhoinv, born1, born2, born3, a, c, d, offset);
 }

 /* ---------------------------------------------------------------------- */
@ -190,13 +169,13 @@ double PairBornGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBornGPU::cpu_compute(int start, int inum, int eflag,
-                              int /* vflag */, int *ilist,
-                              int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forceborn,factor_lj;
-  double r,rexp;
+void PairBornGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                              int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r6inv, forceborn, factor_lj;
+  double r, rexp;
  int *jlist;

  double **x = atom->x;
@ -223,29 +202,29 @@ void PairBornGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        r6inv = r2inv*r2inv*r2inv;
+        r2inv = 1.0 / rsq;
+        r6inv = r2inv * r2inv * r2inv;
        r = sqrt(rsq);
-        rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
-        forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
-          born3[itype][jtype]*r2inv*r6inv;
-        fpair = factor_lj*forceborn*r2inv;
+        rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
+        forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
+            born3[itype][jtype] * r2inv * r6inv;
+        fpair = factor_lj * forceborn * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
-            d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+          evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
+              d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_buck_coul_cut_gpu.cpp
+++ b/src/GPU/pair_buck_coul_cut_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,35 +32,29 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                   double **host_buck1, double **host_buck2, double **host_a,
-                   double **host_c, double **offset, double *special_lj,
-                   const int inum, const int nall, const int max_nbors,
-                   const int maxspecial, const double cell_size,
-                   int &gpu_mode, FILE *screen, double **host_cut_ljsq,
-                   double **host_cut_coulsq, double *host_special_coul,
+int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
+                   double **host_buck2, double **host_a, double **host_c, double **offset,
+                   double *special_lj, const int inum, const int nall, const int max_nbors,
+                   const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                   double **host_cut_ljsq, double **host_cut_coulsq, double *host_special_coul,
                   const double qqrd2e);
 void buckc_gpu_clear();
-int ** buckc_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum, const double cpu_time,
-                           bool &success, double *host_q, double *boxlo,
-                           double *prd);
-void buckc_gpu_compute(const int ago, const int inum_full, const int nall,
-                       double **host_x, int *host_type, int *ilist, int *numj,
-                       int **firstneigh, const bool eflag, const bool vflag,
-                       const bool eatom, const bool vatom, int &host_start,
-                       const double cpu_time, bool &success, double *host_q,
-                       const int nlocal, double *boxlo, double *prd);
+int **buckc_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                          int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                          tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                          const bool vatom, int &host_start, int **ilist, int **jnum,
+                          const double cpu_time, bool &success, double *host_q, double *boxlo,
+                          double *prd);
+void buckc_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                       int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                       const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                       const double cpu_time, bool &success, double *host_q, const int nlocal,
+                       double *boxlo, double *prd);
 double buckc_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairBuckCoulCutGPU::PairBuckCoulCutGPU(LAMMPS *lmp) : PairBuckCoulCut(lmp),
-                                                      gpu_mode(GPU_FORCE)
+PairBuckCoulCutGPU::PairBuckCoulCutGPU(LAMMPS *lmp) : PairBuckCoulCut(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -84,7 +76,7 @@ PairBuckCoulCutGPU::~PairBuckCoulCutGPU()

 void PairBuckCoulCutGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -92,7 +84,7 @@ void PairBuckCoulCutGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -101,30 +93,25 @@ void PairBuckCoulCutGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = buckc_gpu_compute_n(neighbor->ago, inum, nall,
-                                     atom->x, atom->type, sublo,
-                                     subhi, atom->tag, atom->nspecial,
-                                     atom->special, eflag, vflag, eflag_atom,
-                                     vflag_atom, host_start,
-                                     &ilist, &numneigh, cpu_time, success,
-                                     atom->q, domain->boxlo, domain->prd);
+    firstneigh = buckc_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                     atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                     eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                     cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    buckc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success, atom->q,
+    buckc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                      eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -137,8 +124,7 @@ void PairBuckCoulCutGPU::compute(int eflag, int vflag)

 void PairBuckCoulCutGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR, "Pair style buck/coul/cut/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style buck/coul/cut/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -146,10 +132,9 @@ void PairBuckCoulCutGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -157,22 +142,16 @@ void PairBuckCoulCutGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = buckc_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
-                               a, c, offset, force->special_lj, atom->nlocal,
-                               atom->nlocal+atom->nghost, mnf, maxspecial,
-                               cell_size, gpu_mode, screen, cut_ljsq,
-                               cut_coulsq, force->special_coul, force->qqrd2e);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      buckc_gpu_init(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset, force->special_lj,
+                     atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
+                     gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -185,13 +164,13 @@ double PairBuckCoulCutGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag,
-                                     int /* vflag */, int *ilist,
-                                     int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double rsq,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
-  double r,rexp;
+void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                     int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, qtmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double rsq, r2inv, r6inv, forcecoul, forcebuck, factor_coul, factor_lj;
+  double r, rexp;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -225,41 +204,44 @@ void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        r = sqrt(rsq);

        if (rsq < cut_coulsq[itype][jtype])
-          forcecoul = qqrd2e * qtmp*q[j]/r;
-        else forcecoul = 0.0;
+          forcecoul = qqrd2e * qtmp * q[j] / r;
+        else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          rexp = exp(-r*rhoinv[itype][jtype]);
-          forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
-        } else forcebuck = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          rexp = exp(-r * rhoinv[itype][jtype]);
+          forcebuck = buck1[itype][jtype] * r * rexp - buck2[itype][jtype] * r6inv;
+        } else
+          forcebuck = 0.0;

-        fpair = (factor_coul*forcecoul + factor_lj*forcebuck) * r2inv;
+        fpair = (factor_coul * forcecoul + factor_lj * forcebuck) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq[itype][jtype])
-            ecoul = factor_coul * qqrd2e * qtmp*q[j]/r;
-          else ecoul = 0.0;
+            ecoul = factor_coul * qqrd2e * qtmp * q[j] / r;
+          else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
-              offset[itype][jtype];
+            evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_buck_coul_long_gpu.cpp
+++ b/src/GPU/pair_buck_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,53 +24,46 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                    double **host_buck1, double **host_buck2, double **host_a,
-                    double **host_c, double **offset, double *special_lj,
-                    const int inum, const int nall, const int max_nbors,
-                    const int maxspecial, const double cell_size,
-                    int &gpu_mode, FILE *screen, double **host_cut_ljsq,
-                    double host_cut_coulsq, double *host_special_coul,
+int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
+                    double **host_buck2, double **host_a, double **host_c, double **offset,
+                    double *special_lj, const int inum, const int nall, const int max_nbors,
+                    const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                    double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul,
                    const double qqrd2e, const double g_ewald);
 void buckcl_gpu_clear();
-int** buckcl_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum,  const double cpu_time,
-                           bool &success, double *host_q, double *boxlo,
-                           double *prd);
-void buckcl_gpu_compute(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, int *ilist, int *numj,
-                        int **firstneigh, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success, double *host_q,
-                        const int nlocal, double *boxlo, double *prd);
+int **buckcl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                           int *host_type, double *sublo, double *subhi, tagint *tag,
+                           int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start, int **ilist,
+                           int **jnum, const double cpu_time, bool &success, double *host_q,
+                           double *boxlo, double *prd);
+void buckcl_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                        int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                        const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q, const int nlocal,
+                        double *boxlo, double *prd);
 double buckcl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairBuckCoulLongGPU::PairBuckCoulLongGPU(LAMMPS *lmp) :
-  PairBuckCoulLong(lmp), gpu_mode(GPU_FORCE)
+PairBuckCoulLongGPU::PairBuckCoulLongGPU(LAMMPS *lmp) : PairBuckCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -93,7 +85,7 @@ PairBuckCoulLongGPU::~PairBuckCoulLongGPU()

 void PairBuckCoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -101,7 +93,7 @@ void PairBuckCoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -110,30 +102,25 @@ void PairBuckCoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = buckcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                      atom->type, sublo, subhi,
-                                      atom->tag, atom->nspecial, atom->special,
-                                      eflag, vflag, eflag_atom, vflag_atom,
-                                      host_start, &ilist, &numneigh, cpu_time,
-                                      success, atom->q, domain->boxlo,
-                                      domain->prd);
+    firstneigh = buckcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                      atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                      eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                      cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    buckcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                       vflag_atom, host_start, cpu_time, success, atom->q,
+    buckcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                       eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                       atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -146,8 +133,7 @@ void PairBuckCoulLongGPU::compute(int eflag, int vflag)

 void PairBuckCoulLongGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR, "Pair style buck/coul/long/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style buck/coul/long/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -155,10 +141,9 @@ void PairBuckCoulLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -170,31 +155,23 @@ void PairBuckCoulLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = buckcl_gpu_init(atom->ntypes+1, cutsq,  rhoinv, buck1, buck2,
-                                a, c, offset, force->special_lj, atom->nlocal,
-                                atom->nlocal+atom->nghost, mnf, maxspecial,
-                                cell_size, gpu_mode, screen, cut_ljsq,
-                                cut_coulsq, force->special_coul, force->qqrd2e,
-                                g_ewald);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = buckcl_gpu_init(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset,
+                                force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                                maxspecial, cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
+                                force->special_coul, force->qqrd2e, g_ewald);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -207,14 +184,13 @@ double PairBuckCoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag,
-                                       int /* vflag */, int *ilist,
-                                       int *numneigh, int **firstneigh)
+void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                      int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double r,rexp,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
-  double grij,expm2,prefactor,t,erfc;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double r, rexp, r2inv, r6inv, forcecoul, forcebuck, factor_coul, factor_lj;
+  double grij, expm2, prefactor, t, erfc;
  int *jlist;
  double rsq;

@ -249,48 +225,51 @@ void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        r = sqrt(rsq);
        if (rsq < cut_coulsq) {
          grij = g_ewald * r;
-          expm2 = exp(-grij*grij);
-          t = 1.0 / (1.0 + EWALD_P*grij);
-          erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-          prefactor = qqrd2e * qtmp*q[j]/r;
-          forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
-        } else forcecoul = 0.0;
+          expm2 = exp(-grij * grij);
+          t = 1.0 / (1.0 + EWALD_P * grij);
+          erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+          prefactor = qqrd2e * qtmp * q[j] / r;
+          forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          rexp = exp(-r*rhoinv[itype][jtype]);
-          forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
-        } else forcebuck = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          rexp = exp(-r * rhoinv[itype][jtype]);
+          forcebuck = buck1[itype][jtype] * r * rexp - buck2[itype][jtype] * r6inv;
+        } else
+          forcebuck = 0.0;

-        fpair = (forcecoul + factor_lj*forcebuck) * r2inv;
+        fpair = (forcecoul + factor_lj * forcebuck) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
-            ecoul = prefactor*erfc;
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            ecoul = prefactor * erfc;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
-              offset[itype][jtype];
+            evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_buck_gpu.cpp
+++ b/src/GPU/pair_buck_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,27 +32,21 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
-                  double **host_buck1, double **host_buck2,
-                  double **host_a, double **host_c,
-                  double **offset, double *special_lj, const int inum,
-                  const int nall, const int max_nbors,  const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen);
-void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
-                     double **host_buck1, double **host_buck2,
-                     double **host_a, double **host_c, double **offset);
+int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
+                  double **host_buck2, double **host_a, double **host_c, double **offset,
+                  double *special_lj, const int inum, const int nall, const int max_nbors,
+                  const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
+void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
+                     double **host_buck2, double **host_a, double **host_c, double **offset);
 void buck_gpu_clear();
-int ** buck_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success);
-void buck_gpu_compute(const int ago, const int inum_full, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
+int **buck_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void buck_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double buck_gpu_bytes();

@ -81,7 +73,7 @@ PairBuckGPU::~PairBuckGPU()

 void PairBuckGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -89,7 +81,7 @@ void PairBuckGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -98,28 +90,24 @@ void PairBuckGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = buck_gpu_compute_n(neighbor->ago, inum, nall,
-                                    atom->x, atom->type, sublo,
-                                    subhi, atom->tag, atom->nspecial,
-                                    atom->special, eflag, vflag, eflag_atom,
-                                    vflag_atom, host_start,
-                                    &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        buck_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    buck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success);
+    buck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -139,10 +127,9 @@ void PairBuckGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +137,15 @@ void PairBuckGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = buck_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
-                              a, c, offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = buck_gpu_init(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset,
+                              force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                              maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -173,8 +154,7 @@ void PairBuckGPU::reinit()
 {
  Pair::reinit();

-  buck_gpu_reinit(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
-                  a, c, offset);
+  buck_gpu_reinit(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset);
 }

 /* ---------------------------------------------------------------------- */
@ -187,12 +167,13 @@ double PairBuckGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                              int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcebuck,factor_lj;
-  double r,rexp;
+void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                              int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r6inv, forcebuck, factor_lj;
+  double r, rexp;
  int *jlist;

  double **x = atom->x;
@ -219,28 +200,27 @@ void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        r6inv = r2inv*r2inv*r2inv;
+        r2inv = 1.0 / rsq;
+        r6inv = r2inv * r2inv * r2inv;
        r = sqrt(rsq);
-        rexp = exp(-r*rhoinv[itype][jtype]);
-        forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
-        fpair = factor_lj*forcebuck*r2inv;
+        rexp = exp(-r * rhoinv[itype][jtype]);
+        forcebuck = buck1[itype][jtype] * r * rexp - buck2[itype][jtype] * r6inv;
+        fpair = factor_lj * forcebuck * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
-            offset[itype][jtype];
+          evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv - offset[itype][jtype];
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_colloid_gpu.cpp
+++ b/src/GPU/pair_colloid_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
 #include "gpu_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -35,26 +33,21 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                     double **host_lj2, double **host_lj3, double **host_lj4,
-                     double **offset, double *special_lj, double **host_a12,
-                     double **host_a1, double **host_a2, double **host_d1,
-                     double **host_d2, double **host_sigma3,
-                     double **host_sigma6, int **host_form, const int nlocal,
-                     const int nall, const int max_nbors, const int maxspecial,
+int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                     double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                     double **host_a12, double **host_a1, double **host_a2, double **host_d1,
+                     double **host_d2, double **host_sigma3, double **host_sigma6, int **host_form,
+                     const int nlocal, const int nall, const int max_nbors, const int maxspecial,
                     const double cell_size, int &gpu_mode, FILE *screen);
 void colloid_gpu_clear();
-int ** colloid_gpu_compute_n(const int ago, const int inum, const int nall,
-                             double **host_x, int *host_type, double *sublo,
-                             double *subhi, tagint *tag, int **nspecial,
-                             tagint **special, const bool eflag,
-                             const bool vflag, const bool eatom,
-                             const bool vatom, int &host_start, int **ilist,
-                             int **jnum, const double cpu_time, bool &success);
-void colloid_gpu_compute(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, int *ilist, int *numj,
-                         int **firstneigh, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
+int **colloid_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                            int *host_type, double *sublo, double *subhi, tagint *tag,
+                            int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                            const bool eatom, const bool vatom, int &host_start, int **ilist,
+                            int **jnum, const double cpu_time, bool &success);
+void colloid_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                         const bool vflag, const bool eatom, const bool vatom, int &host_start,
                         const double cpu_time, bool &success);
 double colloid_gpu_bytes();

@ -82,7 +75,7 @@ PairColloidGPU::~PairColloidGPU()

 void PairColloidGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -90,7 +83,7 @@ void PairColloidGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -99,28 +92,24 @@ void PairColloidGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = colloid_gpu_compute_n(neighbor->ago, inum, nall,
-                                       atom->x, atom->type, sublo,
-                                       subhi, atom->tag, atom->nspecial,
-                                       atom->special, eflag, vflag, eflag_atom,
-                                       vflag_atom, host_start,
-                                       &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        colloid_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                              atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
+                              vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    colloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                        vflag_atom, host_start, cpu_time, success);
+    colloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                        eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -140,10 +129,9 @@ void PairColloidGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -152,32 +140,29 @@ void PairColloidGPU::init_style()
  double cell_size = sqrt(maxcut) + neighbor->skin;

  int **_form = nullptr;
-  int n=atom->ntypes;
-  memory->create(_form,n+1,n+1,"colloid/gpu:_form");
+  int n = atom->ntypes;
+  memory->create(_form, n + 1, n + 1, "colloid/gpu:_form");
  for (int i = 1; i <= n; i++) {
    for (int j = 1; j <= n; j++) {
-      if (form[i][j] == SMALL_SMALL) _form[i][j] = 0;
-      else if (form[i][j] == SMALL_LARGE) _form[i][j] = 1;
-      else if (form[i][j] == LARGE_LARGE) _form[i][j] = 2;
+      if (form[i][j] == SMALL_SMALL)
+        _form[i][j] = 0;
+      else if (form[i][j] == SMALL_LARGE)
+        _form[i][j] = 1;
+      else if (form[i][j] == LARGE_LARGE)
+        _form[i][j] = 2;
    }
  }
-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = colloid_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                                 offset, force->special_lj, a12, a1, a2,
-                                 d1, d2, sigma3, sigma6, _form, atom->nlocal,
-                                 atom->nlocal+atom->nghost, mnf, maxspecial,
-                                 cell_size, gpu_mode, screen);
+  int success =
+      colloid_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, a12,
+                       a1, a2, d1, d2, sigma3, sigma6, _form, atom->nlocal,
+                       atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
  memory->destroy(_form);
-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -190,15 +175,14 @@ double PairColloidGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairColloidGPU::cpu_compute(int start, int inum, int eflag,
-                                 int /* vflag */, int *ilist,
+void PairColloidGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                 int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double r,rsq,r2inv,r6inv,forcelj,factor_lj;
-  double c1,c2,fR,dUR,dUA;
-  double K[9],h[4],g[4];
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double r, rsq, r2inv, r6inv, forcelj, factor_lj;
+  double c1, c2, fR, dUR, dUA;
+  double K[9], h[4], g[4];
  int *jlist;

  double **x = atom->x;
@ -225,90 +209,91 @@ void PairColloidGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq >= cutsq[itype][jtype]) continue;

      switch (form[itype][jtype]) {
-      case SMALL_SMALL:
-        r2inv = 1.0/rsq;
-        r6inv = r2inv*r2inv*r2inv;
-        forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        fpair = factor_lj*forcelj*r2inv;
-        if (eflag)
-          evdwl = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
-            offset[itype][jtype];
-        break;
+        case SMALL_SMALL:
+          r2inv = 1.0 / rsq;
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+          fpair = factor_lj * forcelj * r2inv;
+          if (eflag)
+            evdwl = r6inv * (r6inv * lj3[itype][jtype] - lj4[itype][jtype]) - offset[itype][jtype];
+          break;

-      case SMALL_LARGE:
-        c2 = a2[itype][jtype];
-        K[1] = c2*c2;
-        K[2] = rsq;
-        K[0] = K[1] - rsq;
-        K[4] = rsq*rsq;
-        K[3] = K[1] - K[2];
-        K[3] *= K[3]*K[3];
-        K[6] = K[3]*K[3];
-        fR = sigma3[itype][jtype]*a12[itype][jtype]*c2*K[1]/K[3];
-        fpair = 4.0/15.0*fR*factor_lj *
-          (2.0*(K[1]+K[2]) * (K[1]*(5.0*K[1]+22.0*K[2])+5.0*K[4]) *
-          sigma6[itype][jtype]/K[6]-5.0) / K[0];
-        if (eflag)
-          evdwl = 2.0/9.0*fR *
-            (1.0-(K[1]*(K[1]*(K[1]/3.0+3.0*K[2])+4.2*K[4])+K[2]*K[4]) *
-            sigma6[itype][jtype]/K[6]) - offset[itype][jtype];
-        if (rsq <= K[1])
-          error->one(FLERR,"Overlapping small/large in pair colloid");
-        break;
+        case SMALL_LARGE:
+          c2 = a2[itype][jtype];
+          K[1] = c2 * c2;
+          K[2] = rsq;
+          K[0] = K[1] - rsq;
+          K[4] = rsq * rsq;
+          K[3] = K[1] - K[2];
+          K[3] *= K[3] * K[3];
+          K[6] = K[3] * K[3];
+          fR = sigma3[itype][jtype] * a12[itype][jtype] * c2 * K[1] / K[3];
+          fpair = 4.0 / 15.0 * fR * factor_lj *
+              (2.0 * (K[1] + K[2]) * (K[1] * (5.0 * K[1] + 22.0 * K[2]) + 5.0 * K[4]) *
+                   sigma6[itype][jtype] / K[6] -
+               5.0) /
+              K[0];
+          if (eflag)
+            evdwl = 2.0 / 9.0 * fR *
+                    (1.0 -
+                     (K[1] * (K[1] * (K[1] / 3.0 + 3.0 * K[2]) + 4.2 * K[4]) + K[2] * K[4]) *
+                         sigma6[itype][jtype] / K[6]) -
+                offset[itype][jtype];
+          if (rsq <= K[1]) error->one(FLERR, "Overlapping small/large in pair colloid");
+          break;

-      case LARGE_LARGE:
-        r = sqrt(rsq);
-        c1 = a1[itype][jtype];
-        c2 = a2[itype][jtype];
-        K[0] = c1*c2;
-        K[1] = c1+c2;
-        K[2] = c1-c2;
-        K[3] = K[1]+r;
-        K[4] = K[1]-r;
-        K[5] = K[2]+r;
-        K[6] = K[2]-r;
-        K[7] = 1.0/(K[3]*K[4]);
-        K[8] = 1.0/(K[5]*K[6]);
-        g[0] = pow(K[3],-7.0);
-        g[1] = pow(K[4],-7.0);
-        g[2] = pow(K[5],-7.0);
-        g[3] = pow(K[6],-7.0);
-        h[0] = ((K[3]+5.0*K[1])*K[3]+30.0*K[0])*g[0];
-        h[1] = ((K[4]+5.0*K[1])*K[4]+30.0*K[0])*g[1];
-        h[2] = ((K[5]+5.0*K[2])*K[5]-30.0*K[0])*g[2];
-        h[3] = ((K[6]+5.0*K[2])*K[6]-30.0*K[0])*g[3];
-        g[0] *= 42.0*K[0]/K[3]+6.0*K[1]+K[3];
-        g[1] *= 42.0*K[0]/K[4]+6.0*K[1]+K[4];
-        g[2] *= -42.0*K[0]/K[5]+6.0*K[2]+K[5];
-        g[3] *= -42.0*K[0]/K[6]+6.0*K[2]+K[6];
+        case LARGE_LARGE:
+          r = sqrt(rsq);
+          c1 = a1[itype][jtype];
+          c2 = a2[itype][jtype];
+          K[0] = c1 * c2;
+          K[1] = c1 + c2;
+          K[2] = c1 - c2;
+          K[3] = K[1] + r;
+          K[4] = K[1] - r;
+          K[5] = K[2] + r;
+          K[6] = K[2] - r;
+          K[7] = 1.0 / (K[3] * K[4]);
+          K[8] = 1.0 / (K[5] * K[6]);
+          g[0] = pow(K[3], -7.0);
+          g[1] = pow(K[4], -7.0);
+          g[2] = pow(K[5], -7.0);
+          g[3] = pow(K[6], -7.0);
+          h[0] = ((K[3] + 5.0 * K[1]) * K[3] + 30.0 * K[0]) * g[0];
+          h[1] = ((K[4] + 5.0 * K[1]) * K[4] + 30.0 * K[0]) * g[1];
+          h[2] = ((K[5] + 5.0 * K[2]) * K[5] - 30.0 * K[0]) * g[2];
+          h[3] = ((K[6] + 5.0 * K[2]) * K[6] - 30.0 * K[0]) * g[3];
+          g[0] *= 42.0 * K[0] / K[3] + 6.0 * K[1] + K[3];
+          g[1] *= 42.0 * K[0] / K[4] + 6.0 * K[1] + K[4];
+          g[2] *= -42.0 * K[0] / K[5] + 6.0 * K[2] + K[5];
+          g[3] *= -42.0 * K[0] / K[6] + 6.0 * K[2] + K[6];

-        fR = a12[itype][jtype]*sigma6[itype][jtype]/r/37800.0;
-        evdwl = fR * (h[0]-h[1]-h[2]+h[3]);
-        dUR = evdwl/r + 5.0*fR*(g[0]+g[1]-g[2]-g[3]);
-        dUA = -a12[itype][jtype]/3.0*r*((2.0*K[0]*K[7]+1.0)*K[7] +
-          (2.0*K[0]*K[8]-1.0)*K[8]);
-        fpair = factor_lj * (dUR+dUA)/r;
-        if (eflag)
-          evdwl += a12[itype][jtype]/6.0 *
-            (2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7])) - offset[itype][jtype];
-        if (r <= K[1])
-          error->one(FLERR,"Overlapping large/large in pair colloid");
-        break;
+          fR = a12[itype][jtype] * sigma6[itype][jtype] / r / 37800.0;
+          evdwl = fR * (h[0] - h[1] - h[2] + h[3]);
+          dUR = evdwl / r + 5.0 * fR * (g[0] + g[1] - g[2] - g[3]);
+          dUA = -a12[itype][jtype] / 3.0 * r *
+              ((2.0 * K[0] * K[7] + 1.0) * K[7] + (2.0 * K[0] * K[8] - 1.0) * K[8]);
+          fpair = factor_lj * (dUR + dUA) / r;
+          if (eflag)
+            evdwl += a12[itype][jtype] / 6.0 * (2.0 * K[0] * (K[7] + K[8]) - log(K[8] / K[7])) -
+                offset[itype][jtype];
+          if (r <= K[1]) error->one(FLERR, "Overlapping large/large in pair colloid");
+          break;
      }

      if (eflag) evdwl *= factor_lj;

-      f[i][0] += delx*fpair;
-      f[i][1] += dely*fpair;
-      f[i][2] += delz*fpair;
+      f[i][0] += delx * fpair;
+      f[i][1] += dely * fpair;
+      f[i][2] += delz * fpair;

-      if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+      if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
    }
  }
 }
--- a/src/GPU/pair_coul_cut_gpu.cpp
+++ b/src/GPU/pair_coul_cut_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,27 +32,21 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int coul_gpu_init(const int ntypes, double **host_scale, double **cutsq,
-                  double *special_coul, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  const double qqrd2e);
+int coul_gpu_init(const int ntypes, double **host_scale, double **cutsq, double *special_coul,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen, const double qqrd2e);
 void coul_gpu_reinit(const int ntypes, double **host_scale);
 void coul_gpu_clear();
-int ** coul_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success, double *host_q, double *boxlo,
-                          double *prd);
-void coul_gpu_compute(const int ago, const int inum,
-                      const int nall, double **host_x, int *host_type,
-                      int *ilist, int *numj, int **firstneigh,
-                      const bool eflag, const bool vflag, const bool eatom,
-                      const bool vatom, int &host_start, const double cpu_time,
-                      bool &success, double *host_q, const int nlocal,
+int **coul_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void coul_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
                      double *boxlo, double *prd);
 double coul_gpu_bytes();

@ -81,7 +73,7 @@ PairCoulCutGPU::~PairCoulCutGPU()

 void PairCoulCutGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -89,7 +81,7 @@ void PairCoulCutGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -98,30 +90,25 @@ void PairCoulCutGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = coul_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->q, domain->boxlo,
-                                   domain->prd);
+    firstneigh = coul_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    coul_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, atom->q,
-                    atom->nlocal, domain->boxlo, domain->prd);
+    coul_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -134,9 +121,7 @@ void PairCoulCutGPU::compute(int eflag, int vflag)

 void PairCoulCutGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style coul/cut/gpu requires atom attribute q");
-
+  if (!atom->q_flag) error->all(FLERR, "Pair style coul/cut/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -144,10 +129,9 @@ void PairCoulCutGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -155,21 +139,15 @@ void PairCoulCutGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = coul_gpu_init(atom->ntypes+1, scale, cutsq,
-                             force->special_coul, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen, force->qqrd2e);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = coul_gpu_init(atom->ntypes + 1, scale, cutsq, force->special_coul, atom->nlocal,
+                              atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                              screen, force->qqrd2e);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -178,7 +156,7 @@ void PairCoulCutGPU::reinit()
 {
  Pair::reinit();

-  coul_gpu_reinit(atom->ntypes+1, scale);
+  coul_gpu_reinit(atom->ntypes + 1, scale);
 }

 /* ---------------------------------------------------------------------- */
@ -191,13 +169,12 @@ double PairCoulCutGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairCoulCutGPU::cpu_compute(int start, int inum, int eflag,
-                                 int /* vflag */, int *ilist,
+void PairCoulCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                 int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
-  double rsq,r2inv,forcecoul,factor_coul;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
+  double rsq, r2inv, forcecoul, factor_coul;
  int *jlist;

  ecoul = 0.0;
@ -229,23 +206,21 @@ void PairCoulCutGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
-        fpair = factor_coul*forcecoul * r2inv;
+        r2inv = 1.0 / rsq;
+        forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
+        fpair = factor_coul * forcecoul * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

-        if (eflag) {
-          ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
-        }
+        if (eflag) { ecoul = factor_coul * qqrd2e * qtmp * q[j] * sqrt(r2inv); }

-        if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_coul_debye_gpu.cpp
+++ b/src/GPU/pair_coul_debye_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,34 +32,28 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
-                    double *special_coul, const int nlocal, const int nall,
-                    const int max_nbors, const int maxspecial,
-                    const double cell_size, int &gpu_mode, FILE *screen,
-                    const double qqrd2e, const double kappa);
+int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq, double *special_coul,
+                    const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen, const double qqrd2e,
+                    const double kappa);
 void cdebye_gpu_reinit(const int ntypes, double **host_scale);
 void cdebye_gpu_clear();
-int ** cdebye_gpu_compute_n(const int ago, const int inum, const int nall,
-                            double **host_x, int *host_type, double *sublo,
-                            double *subhi, tagint *tag, int **nspecial,
-                            tagint **special, const bool eflag,
-                            const bool vflag, const bool eatom,
-                            const bool vatom, int &host_start, int **ilist,
-                            int **jnum, const double cpu_time, bool &success,
-                            double *host_q, double *boxlo, double *prd);
-void cdebye_gpu_compute(const int ago, const int inum, const int nall,
-                        double **host_x, int *host_type, int *ilist,
-                        int *numj, int **firstneigh, const bool eflag,
-                        const bool vflag, const bool eatom, const bool vatom,
-                        int &host_start, const double cpu_time, bool &success,
-                        double *host_q, const int nlocal, double *boxlo,
-                        double *prd);
+int **cdebye_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                           int *host_type, double *sublo, double *subhi, tagint *tag,
+                           int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start, int **ilist,
+                           int **jnum, const double cpu_time, bool &success, double *host_q,
+                           double *boxlo, double *prd);
+void cdebye_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                        const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q, const int nlocal,
+                        double *boxlo, double *prd);
 double cdebye_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairCoulDebyeGPU::PairCoulDebyeGPU(LAMMPS *lmp) :
-  PairCoulDebye(lmp), gpu_mode(GPU_FORCE)
+PairCoulDebyeGPU::PairCoulDebyeGPU(LAMMPS *lmp) : PairCoulDebye(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  cpu_time = 0.0;
@ -82,7 +74,7 @@ PairCoulDebyeGPU::~PairCoulDebyeGPU()

 void PairCoulDebyeGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -90,7 +82,7 @@ void PairCoulDebyeGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -99,30 +91,25 @@ void PairCoulDebyeGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = cdebye_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = cdebye_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                      atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                      eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                      cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    cdebye_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
-                     atom->nlocal, domain->boxlo, domain->prd);
+    cdebye_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                       eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                       atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -135,9 +122,7 @@ void PairCoulDebyeGPU::compute(int eflag, int vflag)

 void PairCoulDebyeGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style coul/debye/gpu requires atom attribute q");
-
+  if (!atom->q_flag) error->all(FLERR, "Pair style coul/debye/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -145,10 +130,9 @@ void PairCoulDebyeGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -156,22 +140,15 @@ void PairCoulDebyeGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = cdebye_gpu_init(atom->ntypes+1, scale, cutsq,
-                                force->special_coul, atom->nlocal,
-                                atom->nlocal+atom->nghost, mnf, maxspecial,
-                                cell_size, gpu_mode, screen,
-                                force->qqrd2e, kappa);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = cdebye_gpu_init(atom->ntypes + 1, scale, cutsq, force->special_coul, atom->nlocal,
+                                atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                                screen, force->qqrd2e, kappa);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -180,7 +157,7 @@ void PairCoulDebyeGPU::reinit()
 {
  Pair::reinit();

-  cdebye_gpu_reinit(atom->ntypes+1, scale);
+  cdebye_gpu_reinit(atom->ntypes + 1, scale);
 }

 /* ---------------------------------------------------------------------- */
@ -193,14 +170,13 @@ double PairCoulDebyeGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
-                                   int /* vflag */, int *ilist,
+void PairCoulDebyeGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                   int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
-  double rsq,r2inv,forcecoul,factor_coul;
-  double r,rinv,screening;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
+  double rsq, r2inv, forcecoul, factor_coul;
+  double r, rinv, screening;
  int *jlist;

  ecoul = 0.0;
@ -232,28 +208,26 @@ void PairCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        r = sqrt(rsq);
-        rinv = 1.0/r;
-        screening = exp(-kappa*r);
-        forcecoul = qqrd2e * scale[itype][jtype] *
-          qtmp*q[j] * screening * (kappa + rinv);
-        fpair = factor_coul*forcecoul * r2inv;
+        rinv = 1.0 / r;
+        screening = exp(-kappa * r);
+        forcecoul = qqrd2e * scale[itype][jtype] * qtmp * q[j] * screening * (kappa + rinv);
+        fpair = factor_coul * forcecoul * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          ecoul = factor_coul * qqrd2e * scale[itype][jtype] *
-            qtmp*q[j] * rinv * screening;
+          ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp * q[j] * rinv * screening;
        }

-        if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_coul_dsf_gpu.cpp
+++ b/src/GPU/pair_coul_dsf_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,54 +23,45 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

 #define MY_PIS 1.77245385090551602729
-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int cdsf_gpu_init(const int ntypes, const int nlocal, const int nall,
-                  const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  const double host_cut_coulsq,
-                  double *host_special_coul, const double qqrd2e,
-                  const double e_shift, const double f_shift,
-                  const double alpha);
+int cdsf_gpu_init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
+                  const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                  const double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                  const double e_shift, const double f_shift, const double alpha);
 void cdsf_gpu_clear();
-int ** cdsf_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success, double *host_q, double *boxlo,
-                          double *prd);
-void cdsf_gpu_compute(const int ago, const int inum,
-                      const int nall, double **host_x, int *host_type,
-                      int *ilist, int *numj, int **firstneigh,
-                      const bool eflag, const bool vflag, const bool eatom,
-                      const bool vatom, int &host_start, const double cpu_time,
-                      bool &success, double *host_q, const int nlocal,
+int **cdsf_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void cdsf_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
                      double *boxlo, double *prd);
 double cdsf_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairCoulDSFGPU::PairCoulDSFGPU(LAMMPS *lmp) : PairCoulDSF(lmp),
-  gpu_mode(GPU_FORCE)
+PairCoulDSFGPU::PairCoulDSFGPU(LAMMPS *lmp) : PairCoulDSF(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -93,7 +83,7 @@ PairCoulDSFGPU::~PairCoulDSFGPU()

 void PairCoulDSFGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -101,7 +91,7 @@ void PairCoulDSFGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -110,30 +100,25 @@ void PairCoulDSFGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = cdsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = cdsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    cdsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
+    cdsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -146,9 +131,7 @@ void PairCoulDSFGPU::compute(int eflag, int vflag)

 void PairCoulDSFGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style coul/dsf/gpu requires atom attribute q");
-
+  if (!atom->q_flag) error->all(FLERR, "Pair style coul/dsf/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -156,10 +139,9 @@ void PairCoulDSFGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -168,27 +150,20 @@ void PairCoulDSFGPU::init_style()
  double cell_size = sqrt(maxcut) + neighbor->skin;

  cut_coulsq = cut_coul * cut_coul;
-  double erfcc = erfc(alpha*cut_coul);
-  double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
-  f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul);
-  e_shift = erfcc/cut_coul - f_shift*cut_coul;
+  double erfcc = erfc(alpha * cut_coul);
+  double erfcd = exp(-alpha * alpha * cut_coul * cut_coul);
+  f_shift = -(erfcc / cut_coulsq + 2.0 / MY_PIS * alpha * erfcd / cut_coul);
+  e_shift = erfcc / cut_coul - f_shift * cut_coul;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = cdsf_gpu_init(atom->ntypes+1, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen, cut_coulsq,
-                              force->special_coul, force->qqrd2e, e_shift,
-                              f_shift, alpha);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = cdsf_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                              maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
+                              force->special_coul, force->qqrd2e, e_shift, f_shift, alpha);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -201,14 +176,13 @@ double PairCoulDSFGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
-                                 int /* vflag */, int *ilist,
+void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                 int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
-  double r,rsq,r2inv,forcecoul,factor_coul;
-  double prefactor,erfcc,erfcd,t;
+  int i, j, ii, jj, jnum;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
+  double r, rsq, r2inv, forcecoul, factor_coul;
+  double prefactor, erfcc, erfcd, t;
  int *jlist;

  ecoul = 0.0;
@ -232,8 +206,8 @@ void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
    jnum = numneigh[i];

    if (evflag) {
-      double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
-      ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
+      double e_self = -(e_shift / 2.0 + alpha / MY_PIS) * qtmp * qtmp * qqrd2e;
+      ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);
    }

    for (jj = 0; jj < jnum; jj++) {
@ -244,32 +218,32 @@ void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;

      if (rsq < cut_coulsq) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        r = sqrt(rsq);
-        prefactor = qqrd2e*qtmp*q[j]/r;
-        erfcd = exp(-alpha*alpha*r*r);
-        t = 1.0 / (1.0 + EWALD_P*alpha*r);
-        erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
-        forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd +
-          r*f_shift) * r;
-        if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+        prefactor = qqrd2e * qtmp * q[j] / r;
+        erfcd = exp(-alpha * alpha * r * r);
+        t = 1.0 / (1.0 + EWALD_P * alpha * r);
+        erfcc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * erfcd;
+        forcecoul = prefactor * (erfcc / r + 2.0 * alpha / MY_PIS * erfcd + r * f_shift) * r;
+        if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;

        fpair = forcecoul * r2inv;
-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
-            ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            ecoul = prefactor * (erfcc - r * e_shift - rsq * f_shift);
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
        }

-        if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_coul_long_cs_gpu.cpp
+++ b/src/GPU/pair_coul_long_cs_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -33,14 +31,14 @@

 using namespace LAMMPS_NS;

-#define EWALD_F   1.12837917
-#define EWALD_P   9.95473818e-1
-#define B0       -0.1335096380159268
-#define B1       -2.57839507e-1
-#define B2       -1.37203639e-1
-#define B3       -8.88822059e-3
-#define B4       -5.80844129e-3
-#define B5        1.14652755e-1
+#define EWALD_F 1.12837917
+#define EWALD_P 9.95473818e-1
+#define B0 -0.1335096380159268
+#define B1 -2.57839507e-1
+#define B2 -1.37203639e-1
+#define B3 -8.88822059e-3
+#define B4 -5.80844129e-3
+#define B5 1.14652755e-1

 #define EPSILON 1.0e-20
 #define EPS_EWALD 1.0e-6
@ -48,33 +46,28 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int clcs_gpu_init(const int ntypes, double **scale, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  double host_cut_coulsq, double *host_special_coul,
+int clcs_gpu_init(const int ntypes, double **scale, const int nlocal, const int nall,
+                  const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
+                  FILE *screen, double host_cut_coulsq, double *host_special_coul,
                  const double qqrd2e, const double g_ewald);
 void clcs_gpu_reinit(const int ntypes, double **scale);
 void clcs_gpu_clear();
-int ** clcs_gpu_compute_n(const int ago, const int inum,
-                          const int nall, double **host_x, int *host_type,
-                          double *sublo, double *subhi, tagint *tag,
-                          int **nspecial, tagint **special, const bool eflag,
-                          const bool vflag, const bool eatom, const bool vatom,
-                          int &host_start, int **ilist, int **jnum,
-                          const double cpu_time, bool &success, double *host_q,
-                          double *boxlo, double *prd);
-void clcs_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
-                      const double cpu_time, bool &success, double *host_q,
-                      const int nlocal, double *boxlo, double *prd);
+int **clcs_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void clcs_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
+                      double *boxlo, double *prd);
 double clcs_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairCoulLongCSGPU::PairCoulLongCSGPU(LAMMPS *lmp) :
-  PairCoulLongCS(lmp), gpu_mode(GPU_FORCE)
+PairCoulLongCSGPU::PairCoulLongCSGPU(LAMMPS *lmp) : PairCoulLongCS(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  cpu_time = 0.0;
@ -95,7 +88,7 @@ PairCoulLongCSGPU::~PairCoulLongCSGPU()

 void PairCoulLongCSGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -103,7 +96,7 @@ void PairCoulLongCSGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -112,30 +105,25 @@ void PairCoulLongCSGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = clcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                  atom->type, sublo, subhi,
-                                  atom->tag, atom->nspecial, atom->special,
-                                  eflag, vflag, eflag_atom, vflag_atom,
-                                  host_start, &ilist, &numneigh, cpu_time,
-                                  success, atom->q, domain->boxlo,
-                                  domain->prd);
+    firstneigh = clcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    clcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                   ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                   vflag_atom, host_start, cpu_time, success, atom->q,
-                   atom->nlocal, domain->boxlo, domain->prd);
+    clcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -150,15 +138,12 @@ void PairCoulLongCSGPU::init_style()
 {
  cut_respa = nullptr;

-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style coul/long/cs/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style coul/long/cs/gpu requires atom attribute q");

  // Call init_one calculation make sure scale is correct
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
-      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        init_one(i,j);
-      }
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { init_one(i, j); }
    }
  }
  double cell_size = cut_coul + neighbor->skin;
@ -167,30 +152,23 @@ void PairCoulLongCSGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = clcs_gpu_init(atom->ntypes+1, scale,
-                            atom->nlocal, atom->nlocal+atom->nghost, mnf,
-                            maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
-                            force->special_coul, force->qqrd2e, g_ewald);
+  int success = clcs_gpu_init(atom->ntypes + 1, scale, atom->nlocal, atom->nlocal + atom->nghost,
+                              mnf, maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
+                              force->special_coul, force->qqrd2e, g_ewald);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -199,7 +177,7 @@ void PairCoulLongCSGPU::reinit()
 {
  Pair::reinit();

-  clcs_gpu_reinit(atom->ntypes+1, scale);
+  clcs_gpu_reinit(atom->ntypes + 1, scale);
 }

 /* ---------------------------------------------------------------------- */
@ -212,15 +190,14 @@ double PairCoulLongCSGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
-                                  int /* vflag */, int *ilist,
-                                  int *numneigh, int **firstneigh)
+void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                    int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itable,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
-  double fraction,table;
-  double r,r2inv,forcecoul,factor_coul;
-  double grij,expm2,prefactor,t,erfc,u;
+  int i, j, ii, jj, jnum, itable, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
+  double fraction, table;
+  double r, r2inv, forcecoul, factor_coul;
+  double grij, expm2, prefactor, t, erfc, u;
  int *jlist;
  double rsq;

@ -253,37 +230,38 @@ void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cut_coulsq) {
-        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
-        r2inv = 1.0/rsq;
+        rsq +=
+            EPSILON;    // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+        r2inv = 1.0 / rsq;
        if (!ncoultablebits || rsq <= tabinnersq) {
          r = sqrt(rsq);
-          prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j];
+          prefactor = qqrd2e * scale[itype][jtype] * qtmp * q[j];
          if (factor_coul < 1.0) {
            // When bonded parts are being calculated a minimal distance (EPS_EWALD)
            // has to be added to the prefactor and erfc in order to make the
            // used approximation functions for the Ewald correction valid
-            grij = g_ewald * (r+EPS_EWALD);
-            expm2 = exp(-grij*grij);
-            t = 1.0 / (1.0 + EWALD_P*grij);
+            grij = g_ewald * (r + EPS_EWALD);
+            expm2 = exp(-grij * grij);
+            t = 1.0 / (1.0 + EWALD_P * grij);
            u = 1.0 - t;
-            erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
-            prefactor /= (r+EPS_EWALD);
-            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - (1.0-factor_coul));
+            erfc = t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
+            prefactor /= (r + EPS_EWALD);
+            forcecoul = prefactor * (erfc + EWALD_F * grij * expm2 - (1.0 - factor_coul));
            // Additionally r2inv needs to be accordingly modified since the later
            // scaling of the overall force shall be consistent
-            r2inv = 1.0/(rsq + EPS_EWALD_SQR);
+            r2inv = 1.0 / (rsq + EPS_EWALD_SQR);
          } else {
            grij = g_ewald * r;
-            expm2 = exp(-grij*grij);
-            t = 1.0 / (1.0 + EWALD_P*grij);
+            expm2 = exp(-grij * grij);
+            t = 1.0 / (1.0 + EWALD_P * grij);
            u = 1.0 - t;
-            erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+            erfc = t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
            prefactor /= r;
-            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+            forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
          }
        } else {
          union_int_float_t rsq_lookup;
@ -291,34 +269,35 @@ void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
          itable = rsq_lookup.i & ncoulmask;
          itable >>= ncoulshiftbits;
          fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
-          table = ftable[itable] + fraction*dftable[itable];
-          forcecoul = scale[itype][jtype] * qtmp*q[j] * table;
+          table = ftable[itable] + fraction * dftable[itable];
+          forcecoul = scale[itype][jtype] * qtmp * q[j] * table;
          if (factor_coul < 1.0) {
-            table = ctable[itable] + fraction*dctable[itable];
-            prefactor = scale[itype][jtype] * qtmp*q[j] * table;
-            forcecoul -= (1.0-factor_coul)*prefactor;
+            table = ctable[itable] + fraction * dctable[itable];
+            prefactor = scale[itype][jtype] * qtmp * q[j] * table;
+            forcecoul -= (1.0 - factor_coul) * prefactor;
          }
        }

        fpair = forcecoul * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            if (!ncoultablebits || rsq <= tabinnersq)
-              ecoul = prefactor*erfc;
+              ecoul = prefactor * erfc;
            else {
-              table = etable[itable] + fraction*detable[itable];
-              ecoul = scale[itype][jtype] * qtmp*q[j] * table;
+              table = etable[itable] + fraction * detable[itable];
+              ecoul = scale[itype][jtype] * qtmp * q[j] * table;
            }
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
        }

-        if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_coul_long_gpu.cpp
+++ b/src/GPU/pair_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,51 +24,44 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int cl_gpu_init(const int ntypes, double **scale,
-                const int nlocal, const int nall, const int max_nbors,
-                const int maxspecial, const double cell_size, int &gpu_mode,
+int cl_gpu_init(const int ntypes, double **scale, const int nlocal, const int nall,
+                const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
                FILE *screen, double host_cut_coulsq, double *host_special_coul,
                const double qqrd2e, const double g_ewald);
 void cl_gpu_reinit(const int ntypes, double **scale);
 void cl_gpu_clear();
-int ** cl_gpu_compute_n(const int ago, const int inum,
-                        const int nall, double **host_x, int *host_type,
-                        double *sublo, double *subhi, tagint *tag,
-                        int **nspecial, tagint **special, const bool eflag,
-                        const bool vflag, const bool eatom, const bool vatom,
-                        int &host_start, int **ilist, int **jnum,
-                        const double cpu_time, bool &success, double *host_q,
-                        double *boxlo, double *prd);
-void cl_gpu_compute(const int ago, const int inum, const int nall,
-                    double **host_x, int *host_type, int *ilist, int *numj,
-                    int **firstneigh, const bool eflag, const bool vflag,
-                    const bool eatom, const bool vatom, int &host_start,
-                    const double cpu_time, bool &success, double *host_q,
-                    const int nlocal, double *boxlo, double *prd);
+int **cl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                       const bool vatom, int &host_start, int **ilist, int **jnum,
+                       const double cpu_time, bool &success, double *host_q, double *boxlo,
+                       double *prd);
+void cl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                    int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                    const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                    bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
 double cl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairCoulLongGPU::PairCoulLongGPU(LAMMPS *lmp) :
-  PairCoulLong(lmp), gpu_mode(GPU_FORCE)
+PairCoulLongGPU::PairCoulLongGPU(LAMMPS *lmp) : PairCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  cpu_time = 0.0;
@ -90,7 +82,7 @@ PairCoulLongGPU::~PairCoulLongGPU()

 void PairCoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -98,7 +90,7 @@ void PairCoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -107,30 +99,25 @@ void PairCoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                  atom->type, sublo, subhi,
-                                  atom->tag, atom->nspecial, atom->special,
-                                  eflag, vflag, eflag_atom, vflag_atom,
-                                  host_start, &ilist, &numneigh, cpu_time,
-                                  success, atom->q, domain->boxlo,
-                                  domain->prd);
+    firstneigh = cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                  atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                  eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                  success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                   ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                   vflag_atom, host_start, cpu_time, success, atom->q,
+    cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                   eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                   atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -145,15 +132,12 @@ void PairCoulLongGPU::init_style()
 {
  cut_respa = nullptr;

-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style coul/long/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style coul/long/gpu requires atom attribute q");

  // Call init_one calculation make sure scale is correct
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
-      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        init_one(i,j);
-      }
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { init_one(i, j); }
    }
  }
  double cell_size = cut_coul + neighbor->skin;
@ -162,30 +146,23 @@ void PairCoulLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = cl_gpu_init(atom->ntypes+1, scale,
-                            atom->nlocal, atom->nlocal+atom->nghost, mnf,
+  int success = cl_gpu_init(atom->ntypes + 1, scale, atom->nlocal, atom->nlocal + atom->nghost, mnf,
                            maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
                            force->special_coul, force->qqrd2e, g_ewald);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -194,7 +171,7 @@ void PairCoulLongGPU::reinit()
 {
  Pair::reinit();

-  cl_gpu_reinit(atom->ntypes+1, scale);
+  cl_gpu_reinit(atom->ntypes + 1, scale);
 }

 /* ---------------------------------------------------------------------- */
@ -207,15 +184,14 @@ double PairCoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairCoulLongGPU::cpu_compute(int start, int inum, int eflag,
-                                  int /* vflag */, int *ilist,
+void PairCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                  int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itable;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
-  double fraction,table;
-  double r,r2inv,forcecoul,factor_coul;
-  double grij,expm2,prefactor,t,erfc;
+  int i, j, ii, jj, jnum, itable;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
+  double fraction, table;
+  double r, r2inv, forcecoul, factor_coul;
+  double grij, expm2, prefactor, t, erfc;
  int *jlist;
  double rsq;

@ -246,54 +222,55 @@ void PairCoulLongGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;

-      r2inv = 1.0/rsq;
+      r2inv = 1.0 / rsq;

      if (rsq < cut_coulsq) {
        if (!ncoultablebits || rsq <= tabinnersq) {
          r = sqrt(rsq);
          grij = g_ewald * r;
-          expm2 = exp(-grij*grij);
-          t = 1.0 / (1.0 + EWALD_P*grij);
-          erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-          prefactor = qqrd2e * qtmp*q[j]/r;
-          forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+          expm2 = exp(-grij * grij);
+          t = 1.0 / (1.0 + EWALD_P * grij);
+          erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+          prefactor = qqrd2e * qtmp * q[j] / r;
+          forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
        } else {
          union_int_float_t rsq_lookup;
          rsq_lookup.f = rsq;
          itable = rsq_lookup.i & ncoulmask;
          itable >>= ncoulshiftbits;
          fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
-          table = ftable[itable] + fraction*dftable[itable];
-          forcecoul = qtmp*q[j] * table;
+          table = ftable[itable] + fraction * dftable[itable];
+          forcecoul = qtmp * q[j] * table;
          if (factor_coul < 1.0) {
-            table = ctable[itable] + fraction*dctable[itable];
-            prefactor = qtmp*q[j] * table;
-            forcecoul -= (1.0-factor_coul)*prefactor;
+            table = ctable[itable] + fraction * dctable[itable];
+            prefactor = qtmp * q[j] * table;
+            forcecoul -= (1.0 - factor_coul) * prefactor;
          }
        }

        fpair = forcecoul * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            if (!ncoultablebits || rsq <= tabinnersq)
-              ecoul = prefactor*erfc;
+              ecoul = prefactor * erfc;
            else {
-              table = etable[itable] + fraction*detable[itable];
-              ecoul = qtmp*q[j] * table;
+              table = etable[itable] + fraction * detable[itable];
+              ecoul = qtmp * q[j] * table;
            }
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
        }

-        if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_dpd_gpu.cpp
+++ b/src/GPU/pair_dpd_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"
 #include "update.h"
@ -35,29 +33,24 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
-                 double **host_gamma, double **host_sigma, double **host_cut,
-                 double *special_lj, const int inum,
-                 const int nall, const int max_nbors,  const int maxspecial,
-                 const double cell_size, int &gpu_mode, FILE *screen);
+int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
+                 double **host_sigma, double **host_cut, double *special_lj, const int inum,
+                 const int nall, const int max_nbors, const int maxspecial, const double cell_size,
+                 int &gpu_mode, FILE *screen);
 void dpd_gpu_clear();
-int ** dpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum, const double cpu_time,
-                         bool &success, double **host_v, const double dtinvsqrt,
-                         const int seed, const int timestep,
-                         double *boxlo, double *prd);
-void dpd_gpu_compute(const int ago, const int inum_full, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success, tagint *tag,
-                     double **host_v, const double dtinvsqrt,
-                     const int seed, const int timestep,
-                     const int nlocal, double *boxlo, double *prd);
+int **dpd_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double **host_v,
+                        const double dtinvsqrt, const int seed, const int timestep, double *boxlo,
+                        double *prd);
+void dpd_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                     int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                     const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success, tagint *tag, double **host_v,
+                     const double dtinvsqrt, const int seed, const int timestep, const int nlocal,
+                     double *boxlo, double *prd);
 double dpd_gpu_bytes();

 #define EPSILON 1.0e-10
@ -66,7 +59,8 @@ double dpd_gpu_bytes();
 //#define _USE_UNIFORM_SARU_TEA8
 //#define _USE_GAUSSIAN_SARU_LCG

-#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
+#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && \
+    !defined(_USE_GAUSSIAN_SARU_LCG)
 #define _USE_UNIFORM_SARU_LCG
 #endif

@ -75,9 +69,9 @@ double dpd_gpu_bytes();
 // 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
 // PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19

-#define LCGA 0x4beb5d59 // Full period 32 bit LCG
+#define LCGA 0x4beb5d59    // Full period 32 bit LCG
 #define LCGC 0x2600e1f7
-#define oWeylPeriod 0xda879add // Prime period 3666320093
+#define oWeylPeriod 0xda879add    // Prime period 3666320093
 #define oWeylOffset 0x8009d14b
 #define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */

@ -89,28 +83,29 @@ double dpd_gpu_bytes();
 // Curly brackets to make variables local to the scope.
 #ifdef _USE_UNIFORM_SARU_LCG
 #define numtyp double
-#define SQRT3 (numtyp)1.7320508075688772935274463
-#define saru(seed1, seed2, seed, timestep, randnum) {                         \
-  unsigned int seed3 = seed + timestep;                                       \
-  seed3^=(seed1<<7)^(seed2>>6);                                               \
-  seed2+=(seed1>>4)^(seed3>>15);                                              \
-  seed1^=(seed2<<9)+(seed3<<8);                                               \
-  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
-  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
-  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
-  seed2+=seed1*seed3;                                                         \
-  seed1+=seed3 ^ (seed2>>2);                                                  \
-  seed2^=((signed int)seed2)>>17;                                             \
-  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
-  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
-  state  = state + (wstate*(wstate^0xdddf97f5));                              \
-  wstate = 0xABCB96F7 + (wstate>>1);                                          \
-  state = LCGA*state + LCGC;                                                  \
-  wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod);   \
-  unsigned int v = (state ^ (state>>26)) + wstate;                            \
-  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
-  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
-}
+#define SQRT3 (numtyp) 1.7320508075688772935274463
+#define saru(seed1, seed2, seed, timestep, randnum)                                \
+  {                                                                                \
+    unsigned int seed3 = seed + timestep;                                          \
+    seed3 ^= (seed1 << 7) ^ (seed2 >> 6);                                          \
+    seed2 += (seed1 >> 4) ^ (seed3 >> 15);                                         \
+    seed1 ^= (seed2 << 9) + (seed3 << 8);                                          \
+    seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1));                          \
+    seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16));                          \
+    seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22));           \
+    seed2 += seed1 * seed3;                                                        \
+    seed1 += seed3 ^ (seed2 >> 2);                                                 \
+    seed2 ^= ((signed int) seed2) >> 17;                                           \
+    unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14));      \
+    unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8);           \
+    state = state + (wstate * (wstate ^ 0xdddf97f5));                              \
+    wstate = 0xABCB96F7 + (wstate >> 1);                                           \
+    state = LCGA * state + LCGC;                                                   \
+    wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
+    unsigned int v = (state ^ (state >> 26)) + wstate;                             \
+    unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);                  \
+    randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0);                 \
+  }
 #endif

 // specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -119,38 +114,39 @@ double dpd_gpu_bytes();
 // Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
 #ifdef _USE_UNIFORM_SARU_TEA8
 #define numtyp double
-#define SQRT3 (numtyp)1.7320508075688772935274463
+#define SQRT3 (numtyp) 1.7320508075688772935274463
 #define k0 0xA341316C
 #define k1 0xC8013EA4
 #define k2 0xAD90777D
 #define k3 0x7E95761E
 #define delta 0x9e3779b9
 #define rounds 8
-#define saru(seed1, seed2, seed, timestep, randnum) {                         \
-  unsigned int seed3 = seed + timestep;                                       \
-  seed3^=(seed1<<7)^(seed2>>6);                                               \
-  seed2+=(seed1>>4)^(seed3>>15);                                              \
-  seed1^=(seed2<<9)+(seed3<<8);                                               \
-  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
-  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
-  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
-  seed2+=seed1*seed3;                                                         \
-  seed1+=seed3 ^ (seed2>>2);                                                  \
-  seed2^=((signed int)seed2)>>17;                                             \
-  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
-  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
-  state  = state + (wstate*(wstate^0xdddf97f5));                              \
-  wstate = 0xABCB96F7 + (wstate>>1);                                          \
-  unsigned int sum = 0;                                                       \
-  for (int i=0; i < rounds; i++) {                                            \
-    sum += delta;                                                             \
-    state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1);            \
-    wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3);              \
-  }                                                                           \
-  unsigned int v = (state ^ (state>>26)) + wstate;                            \
-  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
-  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
-}
+#define saru(seed1, seed2, seed, timestep, randnum)                           \
+  {                                                                           \
+    unsigned int seed3 = seed + timestep;                                     \
+    seed3 ^= (seed1 << 7) ^ (seed2 >> 6);                                     \
+    seed2 += (seed1 >> 4) ^ (seed3 >> 15);                                    \
+    seed1 ^= (seed2 << 9) + (seed3 << 8);                                     \
+    seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1));                     \
+    seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16));                     \
+    seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22));      \
+    seed2 += seed1 * seed3;                                                   \
+    seed1 += seed3 ^ (seed2 >> 2);                                            \
+    seed2 ^= ((signed int) seed2) >> 17;                                      \
+    unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
+    unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8);      \
+    state = state + (wstate * (wstate ^ 0xdddf97f5));                         \
+    wstate = 0xABCB96F7 + (wstate >> 1);                                      \
+    unsigned int sum = 0;                                                     \
+    for (int i = 0; i < rounds; i++) {                                        \
+      sum += delta;                                                           \
+      state += ((wstate << 4) + k0) ^ (wstate + sum) ^ ((wstate >> 5) + k1);  \
+      wstate += ((state << 4) + k2) ^ (state + sum) ^ ((state >> 5) + k3);    \
+    }                                                                         \
+    unsigned int v = (state ^ (state >> 26)) + wstate;                        \
+    unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);             \
+    randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0);            \
+  }
 #endif

 // specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -159,42 +155,43 @@ double dpd_gpu_bytes();
 // This is used to compared with CPU DPD using RandMars::gaussian()
 #ifdef _USE_GAUSSIAN_SARU_LCG
 #define numtyp double
-#define saru(seed1, seed2, seed, timestep, randnum) {                         \
-  unsigned int seed3 = seed + timestep;                                       \
-  seed3^=(seed1<<7)^(seed2>>6);                                               \
-  seed2+=(seed1>>4)^(seed3>>15);                                              \
-  seed1^=(seed2<<9)+(seed3<<8);                                               \
-  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
-  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
-  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
-  seed2+=seed1*seed3;                                                         \
-  seed1+=seed3 ^ (seed2>>2);                                                  \
-  seed2^=((signed int)seed2)>>17;                                             \
-  unsigned int state=0x12345678;                                              \
-  unsigned int wstate=12345678;                                               \
-  state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));                      \
-  wstate = (state + seed2) ^ (((signed int)state)>>8);                        \
-  state  = state + (wstate*(wstate^0xdddf97f5));                              \
-  wstate = 0xABCB96F7 + (wstate>>1);                                          \
-  unsigned int v, s;                                                          \
-  numtyp r1, r2, rsq;                                                         \
-  while (1) {                                                                 \
-    state = LCGA*state + LCGC;                                                \
-    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
-    v = (state ^ (state>>26)) + wstate;                                       \
-    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
-    r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
-    state = LCGA*state + LCGC;                                                \
-    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
-    v = (state ^ (state>>26)) + wstate;                                       \
-    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
-    r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
-    rsq = r1 * r1 + r2 * r2;                                                  \
-    if (rsq < (numtyp)1.0) break;                                             \
-  }                                                                           \
-  numtyp fac = sqrt((numtyp)-2.0*log(rsq)/rsq);                               \
-  randnum = r2*fac;                                                           \
-}
+#define saru(seed1, seed2, seed, timestep, randnum)                                  \
+  {                                                                                  \
+    unsigned int seed3 = seed + timestep;                                            \
+    seed3 ^= (seed1 << 7) ^ (seed2 >> 6);                                            \
+    seed2 += (seed1 >> 4) ^ (seed3 >> 15);                                           \
+    seed1 ^= (seed2 << 9) + (seed3 << 8);                                            \
+    seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1));                            \
+    seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16));                            \
+    seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22));             \
+    seed2 += seed1 * seed3;                                                          \
+    seed1 += seed3 ^ (seed2 >> 2);                                                   \
+    seed2 ^= ((signed int) seed2) >> 17;                                             \
+    unsigned int state = 0x12345678;                                                 \
+    unsigned int wstate = 12345678;                                                  \
+    state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14));                     \
+    wstate = (state + seed2) ^ (((signed int) state) >> 8);                          \
+    state = state + (wstate * (wstate ^ 0xdddf97f5));                                \
+    wstate = 0xABCB96F7 + (wstate >> 1);                                             \
+    unsigned int v, s;                                                               \
+    numtyp r1, r2, rsq;                                                              \
+    while (1) {                                                                      \
+      state = LCGA * state + LCGC;                                                   \
+      wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
+      v = (state ^ (state >> 26)) + wstate;                                          \
+      s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);                               \
+      r1 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0;                                \
+      state = LCGA * state + LCGC;                                                   \
+      wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
+      v = (state ^ (state >> 26)) + wstate;                                          \
+      s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);                               \
+      r2 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0;                                \
+      rsq = r1 * r1 + r2 * r2;                                                       \
+      if (rsq < (numtyp) 1.0) break;                                                 \
+    }                                                                                \
+    numtyp fac = sqrt((numtyp) -2.0 * log(rsq) / rsq);                               \
+    randnum = r2 * fac;                                                              \
+  }
 #endif

 /* ---------------------------------------------------------------------- */
@ -221,17 +218,17 @@ PairDPDGPU::~PairDPDGPU()

 void PairDPDGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;

-  double dtinvsqrt = 1.0/sqrt(update->dt);
+  double dtinvsqrt = 1.0 / sqrt(update->dt);

  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -240,33 +237,26 @@ void PairDPDGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = dpd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->v, dtinvsqrt, seed,
-                                   update->ntimestep,
-                                   domain->boxlo, domain->prd);
+    firstneigh = dpd_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->v, dtinvsqrt, seed, update->ntimestep, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    dpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success,
-                    atom->tag, atom->v, dtinvsqrt, seed,
-                    update->ntimestep,
-                    atom->nlocal, domain->boxlo, domain->prd);
+    dpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->tag,
+                    atom->v, dtinvsqrt, seed, update->ntimestep, atom->nlocal, domain->boxlo,
+                    domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -286,10 +276,9 @@ void PairDPDGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        mcut = init_one(i,j);
+        mcut = init_one(i, j);
        mcut *= mcut;
-        if (mcut > maxcut)
-          maxcut = mcut;
+        if (mcut > maxcut) maxcut = mcut;
        cutsq[i][j] = cutsq[j][i] = mcut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -297,21 +286,15 @@ void PairDPDGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = dpd_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
-                             cut, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      dpd_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, force->special_lj, atom->nlocal,
+                   atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -324,14 +307,15 @@ double PairDPDGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                             int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
-  double rsq,r,rinv,dot,wd,randnum,factor_dpd;
+void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                             int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
+  double rsq, r, rinv, dot, wd, randnum, factor_dpd;
  int *jlist;
-  tagint itag,jtag;
+  tagint itag, jtag;

  double **x = atom->x;
  double **v = atom->v;
@ -339,8 +323,8 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
  int *type = atom->type;
  tagint *tag = atom->tag;
  double *special_lj = force->special_lj;
-  double dtinvsqrt = 1.0/sqrt(update->dt);
-  int timestep = (int)update->ntimestep;
+  double dtinvsqrt = 1.0 / sqrt(update->dt);
+  int timestep = (int) update->ntimestep;

  // loop over neighbors of my atoms

@ -365,23 +349,24 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];
      jtag = tag[j];

      if (rsq < cutsq[itype][jtype]) {
        r = sqrt(rsq);
-        if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
-        rinv = 1.0/r;
+        if (r < EPSILON) continue;    // r can be 0.0 in DPD systems
+        rinv = 1.0 / r;
        delvx = vxtmp - v[j][0];
        delvy = vytmp - v[j][1];
        delvz = vztmp - v[j][2];
-        dot = delx*delvx + dely*delvy + delz*delvz;
-        wd = 1.0 - r/cut[itype][jtype];
+        dot = delx * delvx + dely * delvy + delz * delvz;
+        wd = 1.0 - r / cut[itype][jtype];

-        unsigned int tag1=itag, tag2=jtag;
+        unsigned int tag1 = itag, tag2 = jtag;
        if (tag1 > tag2) {
-          tag1 = jtag; tag2 = itag;
+          tag1 = jtag;
+          tag2 = itag;
        }

        randnum = 0.0;
@ -391,24 +376,24 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
        // drag force = -gamma * wd^2 * (delx dot delv) / r
        // random force = sigma * wd * rnd * dtinvsqrt;

-        fpair = a0[itype][jtype]*wd;
-        fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
-        fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
-        fpair *= factor_dpd*rinv;
+        fpair = a0[itype][jtype] * wd;
+        fpair -= gamma[itype][jtype] * wd * wd * dot * rinv;
+        fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
+        fpair *= factor_dpd * rinv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          // unshifted eng of conservative term:
          // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
          // eng shifted to 0.0 at cutoff
-          evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd;
+          evdwl = 0.5 * a0[itype][jtype] * cut[itype][jtype] * wd * wd;
          evdwl *= factor_dpd;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_dpd_tstat_gpu.cpp
+++ b/src/GPU/pair_dpd_tstat_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"
 #include "update.h"
@ -35,33 +33,26 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
-                 double **host_gamma, double **host_sigma, double **host_cut,
-                 double *special_lj, const int inum,
-                 const int nall, const int max_nbors,  const int maxspecial,
-                 const double cell_size, int &gpu_mode, FILE *screen);
+int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
+                       double **host_sigma, double **host_cut, double *special_lj, const int inum,
+                       const int nall, const int max_nbors, const int maxspecial,
+                       const double cell_size, int &gpu_mode, FILE *screen);
 void dpd_tstat_gpu_clear();
-int ** dpd_tstat_gpu_compute_n(const int ago, const int inum_full,
-                         const int nall, double **host_x, int *host_type,
-                         double *sublo, double *subhi, tagint *tag,
-                         int **nspecial, tagint **special, const bool eflag,
-                         const bool vflag, const bool eatom, const bool vatom,
-                         int &host_start, int **ilist, int **jnum,
-                         const double cpu_time, bool &success,
-                         double **host_v, const double dtinvsqrt,
-                         const int seed, const int timestep,
-                         double *boxlo, double *prd);
-void dpd_tstat_gpu_compute(const int ago, const int inum_full, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success, tagint *tag,
-                     double **host_v, const double dtinvsqrt,
-                     const int seed, const int timestep,
-                     const int nlocal, double *boxlo, double *prd);
-void dpd_tstat_gpu_update_coeff(int ntypes, double **host_a0,
-                                double **host_gamma, double **host_sigma,
-                                double **host_cut);
+int **dpd_tstat_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                              int *host_type, double *sublo, double *subhi, tagint *tag,
+                              int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                              const bool eatom, const bool vatom, int &host_start, int **ilist,
+                              int **jnum, const double cpu_time, bool &success, double **host_v,
+                              const double dtinvsqrt, const int seed, const int timestep,
+                              double *boxlo, double *prd);
+void dpd_tstat_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                           int *host_type, int *ilist, int *numj, int **firstneigh,
+                           const bool eflag, const bool vflag, const bool eatom, const bool vatom,
+                           int &host_start, const double cpu_time, bool &success, tagint *tag,
+                           double **host_v, const double dtinvsqrt, const int seed,
+                           const int timestep, const int nlocal, double *boxlo, double *prd);
+void dpd_tstat_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
+                                double **host_sigma, double **host_cut);
 double dpd_tstat_gpu_bytes();

 #define EPSILON 1.0e-10
@ -70,7 +61,8 @@ double dpd_tstat_gpu_bytes();
 //#define _USE_UNIFORM_SARU_TEA8
 //#define _USE_GAUSSIAN_SARU_LCG

-#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
+#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && \
+    !defined(_USE_GAUSSIAN_SARU_LCG)
 #define _USE_UNIFORM_SARU_LCG
 #endif

@ -79,9 +71,9 @@ double dpd_tstat_gpu_bytes();
 // 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
 // PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19

-#define LCGA 0x4beb5d59 // Full period 32 bit LCG
+#define LCGA 0x4beb5d59    // Full period 32 bit LCG
 #define LCGC 0x2600e1f7
-#define oWeylPeriod 0xda879add // Prime period 3666320093
+#define oWeylPeriod 0xda879add    // Prime period 3666320093
 #define oWeylOffset 0x8009d14b
 #define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */

@ -93,28 +85,29 @@ double dpd_tstat_gpu_bytes();
 // Curly brackets to make variables local to the scope.
 #ifdef _USE_UNIFORM_SARU_LCG
 #define numtyp double
-#define SQRT3 (numtyp)1.7320508075688772935274463
-#define saru(seed1, seed2, seed, timestep, randnum) {                         \
-  unsigned int seed3 = seed + timestep;                                       \
-  seed3^=(seed1<<7)^(seed2>>6);                                               \
-  seed2+=(seed1>>4)^(seed3>>15);                                              \
-  seed1^=(seed2<<9)+(seed3<<8);                                               \
-  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
-  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
-  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
-  seed2+=seed1*seed3;                                                         \
-  seed1+=seed3 ^ (seed2>>2);                                                  \
-  seed2^=((signed int)seed2)>>17;                                             \
-  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
-  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
-  state  = state + (wstate*(wstate^0xdddf97f5));                              \
-  wstate = 0xABCB96F7 + (wstate>>1);                                          \
-  state = LCGA*state + LCGC;                                                  \
-  wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod);   \
-  unsigned int v = (state ^ (state>>26)) + wstate;                            \
-  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
-  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
-}
+#define SQRT3 (numtyp) 1.7320508075688772935274463
+#define saru(seed1, seed2, seed, timestep, randnum)                                \
+  {                                                                                \
+    unsigned int seed3 = seed + timestep;                                          \
+    seed3 ^= (seed1 << 7) ^ (seed2 >> 6);                                          \
+    seed2 += (seed1 >> 4) ^ (seed3 >> 15);                                         \
+    seed1 ^= (seed2 << 9) + (seed3 << 8);                                          \
+    seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1));                          \
+    seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16));                          \
+    seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22));           \
+    seed2 += seed1 * seed3;                                                        \
+    seed1 += seed3 ^ (seed2 >> 2);                                                 \
+    seed2 ^= ((signed int) seed2) >> 17;                                           \
+    unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14));      \
+    unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8);           \
+    state = state + (wstate * (wstate ^ 0xdddf97f5));                              \
+    wstate = 0xABCB96F7 + (wstate >> 1);                                           \
+    state = LCGA * state + LCGC;                                                   \
+    wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
+    unsigned int v = (state ^ (state >> 26)) + wstate;                             \
+    unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);                  \
+    randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0);                 \
+  }
 #endif

 // specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -123,38 +116,39 @@ double dpd_tstat_gpu_bytes();
 // Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
 #ifdef _USE_UNIFORM_SARU_TEA8
 #define numtyp double
-#define SQRT3 (numtyp)1.7320508075688772935274463
+#define SQRT3 (numtyp) 1.7320508075688772935274463
 #define k0 0xA341316C
 #define k1 0xC8013EA4
 #define k2 0xAD90777D
 #define k3 0x7E95761E
 #define delta 0x9e3779b9
 #define rounds 8
-#define saru(seed1, seed2, seed, timestep, randnum) {                         \
-  unsigned int seed3 = seed + timestep;                                       \
-  seed3^=(seed1<<7)^(seed2>>6);                                               \
-  seed2+=(seed1>>4)^(seed3>>15);                                              \
-  seed1^=(seed2<<9)+(seed3<<8);                                               \
-  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
-  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
-  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
-  seed2+=seed1*seed3;                                                         \
-  seed1+=seed3 ^ (seed2>>2);                                                  \
-  seed2^=((signed int)seed2)>>17;                                             \
-  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
-  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
-  state  = state + (wstate*(wstate^0xdddf97f5));                              \
-  wstate = 0xABCB96F7 + (wstate>>1);                                          \
-  unsigned int sum = 0;                                                       \
-  for (int i=0; i < rounds; i++) {                                            \
-    sum += delta;                                                             \
-    state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1);            \
-    wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3);              \
-  }                                                                           \
-  unsigned int v = (state ^ (state>>26)) + wstate;                            \
-  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
-  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
-}
+#define saru(seed1, seed2, seed, timestep, randnum)                           \
+  {                                                                           \
+    unsigned int seed3 = seed + timestep;                                     \
+    seed3 ^= (seed1 << 7) ^ (seed2 >> 6);                                     \
+    seed2 += (seed1 >> 4) ^ (seed3 >> 15);                                    \
+    seed1 ^= (seed2 << 9) + (seed3 << 8);                                     \
+    seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1));                     \
+    seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16));                     \
+    seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22));      \
+    seed2 += seed1 * seed3;                                                   \
+    seed1 += seed3 ^ (seed2 >> 2);                                            \
+    seed2 ^= ((signed int) seed2) >> 17;                                      \
+    unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
+    unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8);      \
+    state = state + (wstate * (wstate ^ 0xdddf97f5));                         \
+    wstate = 0xABCB96F7 + (wstate >> 1);                                      \
+    unsigned int sum = 0;                                                     \
+    for (int i = 0; i < rounds; i++) {                                        \
+      sum += delta;                                                           \
+      state += ((wstate << 4) + k0) ^ (wstate + sum) ^ ((wstate >> 5) + k1);  \
+      wstate += ((state << 4) + k2) ^ (state + sum) ^ ((state >> 5) + k3);    \
+    }                                                                         \
+    unsigned int v = (state ^ (state >> 26)) + wstate;                        \
+    unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);             \
+    randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0);            \
+  }
 #endif

 // specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -163,48 +157,48 @@ double dpd_tstat_gpu_bytes();
 // This is used to compared with CPU DPD using RandMars::gaussian()
 #ifdef _USE_GAUSSIAN_SARU_LCG
 #define numtyp double
-#define saru(seed1, seed2, seed, timestep, randnum) {                         \
-  unsigned int seed3 = seed + timestep;                                       \
-  seed3^=(seed1<<7)^(seed2>>6);                                               \
-  seed2+=(seed1>>4)^(seed3>>15);                                              \
-  seed1^=(seed2<<9)+(seed3<<8);                                               \
-  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
-  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
-  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
-  seed2+=seed1*seed3;                                                         \
-  seed1+=seed3 ^ (seed2>>2);                                                  \
-  seed2^=((signed int)seed2)>>17;                                             \
-  unsigned int state=0x12345678;                                              \
-  unsigned int wstate=12345678;                                               \
-  state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));                      \
-  wstate = (state + seed2) ^ (((signed int)state)>>8);                        \
-  state  = state + (wstate*(wstate^0xdddf97f5));                              \
-  wstate = 0xABCB96F7 + (wstate>>1);                                          \
-  unsigned int v, s;                                                          \
-  numtyp r1, r2, rsq;                                                         \
-  while (1) {                                                                 \
-    state = LCGA*state + LCGC;                                                \
-    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
-    v = (state ^ (state>>26)) + wstate;                                       \
-    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
-    r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
-    state = LCGA*state + LCGC;                                                \
-    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
-    v = (state ^ (state>>26)) + wstate;                                       \
-    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
-    r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
-    rsq = r1 * r1 + r2 * r2;                                                  \
-    if (rsq < (numtyp)1.0) break;                                             \
-  }                                                                           \
-  numtyp fac = sqrt((numtyp)-2.0*log(rsq)/rsq);                               \
-  randnum = r2*fac;                                                           \
-}
+#define saru(seed1, seed2, seed, timestep, randnum)                                  \
+  {                                                                                  \
+    unsigned int seed3 = seed + timestep;                                            \
+    seed3 ^= (seed1 << 7) ^ (seed2 >> 6);                                            \
+    seed2 += (seed1 >> 4) ^ (seed3 >> 15);                                           \
+    seed1 ^= (seed2 << 9) + (seed3 << 8);                                            \
+    seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1));                            \
+    seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16));                            \
+    seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22));             \
+    seed2 += seed1 * seed3;                                                          \
+    seed1 += seed3 ^ (seed2 >> 2);                                                   \
+    seed2 ^= ((signed int) seed2) >> 17;                                             \
+    unsigned int state = 0x12345678;                                                 \
+    unsigned int wstate = 12345678;                                                  \
+    state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14));                     \
+    wstate = (state + seed2) ^ (((signed int) state) >> 8);                          \
+    state = state + (wstate * (wstate ^ 0xdddf97f5));                                \
+    wstate = 0xABCB96F7 + (wstate >> 1);                                             \
+    unsigned int v, s;                                                               \
+    numtyp r1, r2, rsq;                                                              \
+    while (1) {                                                                      \
+      state = LCGA * state + LCGC;                                                   \
+      wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
+      v = (state ^ (state >> 26)) + wstate;                                          \
+      s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);                               \
+      r1 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0;                                \
+      state = LCGA * state + LCGC;                                                   \
+      wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
+      v = (state ^ (state >> 26)) + wstate;                                          \
+      s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7);                               \
+      r2 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0;                                \
+      rsq = r1 * r1 + r2 * r2;                                                       \
+      if (rsq < (numtyp) 1.0) break;                                                 \
+    }                                                                                \
+    numtyp fac = sqrt((numtyp) -2.0 * log(rsq) / rsq);                               \
+    randnum = r2 * fac;                                                              \
+  }
 #endif

 /* ---------------------------------------------------------------------- */

-PairDPDTstatGPU::PairDPDTstatGPU(LAMMPS *lmp) : PairDPDTstat(lmp),
-  gpu_mode(GPU_FORCE)
+PairDPDTstatGPU::PairDPDTstatGPU(LAMMPS *lmp) : PairDPDTstat(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -226,31 +220,31 @@ PairDPDTstatGPU::~PairDPDTstatGPU()

 void PairDPDTstatGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  // adjust sigma if target T is changing

  if (t_start != t_stop) {
    double delta = update->ntimestep - update->beginstep;
    if (delta != 0.0) delta /= update->endstep - update->beginstep;
-    temperature = t_start + delta * (t_stop-t_start);
+    temperature = t_start + delta * (t_stop - t_start);
    double boltz = force->boltz;
    for (int i = 1; i <= atom->ntypes; i++)
      for (int j = i; j <= atom->ntypes; j++)
-        sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]);
+        sigma[i][j] = sigma[j][i] = sqrt(2.0 * boltz * temperature * gamma[i][j]);

-    dpd_tstat_gpu_update_coeff(atom->ntypes+1, a0, gamma, sigma, cut);
+    dpd_tstat_gpu_update_coeff(atom->ntypes + 1, a0, gamma, sigma, cut);
  }

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;

-  double dtinvsqrt = 1.0/sqrt(update->dt);
+  double dtinvsqrt = 1.0 / sqrt(update->dt);

  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -259,33 +253,26 @@ void PairDPDTstatGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = dpd_tstat_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->v, dtinvsqrt, seed,
-                                   update->ntimestep,
-                                   domain->boxlo, domain->prd);
+    firstneigh = dpd_tstat_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->v, dtinvsqrt, seed, update->ntimestep, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    dpd_tstat_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success,
-                    atom->tag, atom->v, dtinvsqrt, seed,
-                    update->ntimestep,
-                    atom->nlocal, domain->boxlo, domain->prd);
+    dpd_tstat_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
+                          firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                          success, atom->tag, atom->v, dtinvsqrt, seed, update->ntimestep,
+                          atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -305,10 +292,9 @@ void PairDPDTstatGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        mcut = init_one(i,j);
+        mcut = init_one(i, j);
        mcut *= mcut;
-        if (mcut > maxcut)
-          maxcut = mcut;
+        if (mcut > maxcut) maxcut = mcut;
        cutsq[i][j] = cutsq[j][i] = mcut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -316,21 +302,15 @@ void PairDPDTstatGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = dpd_tstat_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
-                                   cut, force->special_lj, atom->nlocal,
-                                   atom->nlocal+atom->nghost, mnf, maxspecial,
-                                   cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = dpd_tstat_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut,
+                                   force->special_lj, atom->nlocal, atom->nlocal + atom->nghost,
+                                   mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -343,15 +323,15 @@ double PairDPDTstatGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
-                                  int /* vflag */, int *ilist,
-                                  int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
-  double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
-  double rsq,r,rinv,dot,wd,randnum,factor_dpd;
+void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* vflag */, int *ilist,
+                                  int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, fpair;
+  double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
+  double rsq, r, rinv, dot, wd, randnum, factor_dpd;
  int *jlist;
-  tagint itag,jtag;
+  tagint itag, jtag;

  double **x = atom->x;
  double **v = atom->v;
@ -359,8 +339,8 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
  int *type = atom->type;
  tagint *tag = atom->tag;
  double *special_lj = force->special_lj;
-  double dtinvsqrt = 1.0/sqrt(update->dt);
-  int timestep = (int)update->ntimestep;
+  double dtinvsqrt = 1.0 / sqrt(update->dt);
+  int timestep = (int) update->ntimestep;

  // loop over neighbors of my atoms

@ -385,23 +365,24 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];
      jtag = tag[j];

      if (rsq < cutsq[itype][jtype]) {
        r = sqrt(rsq);
-        if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
-        rinv = 1.0/r;
+        if (r < EPSILON) continue;    // r can be 0.0 in DPD systems
+        rinv = 1.0 / r;
        delvx = vxtmp - v[j][0];
        delvy = vytmp - v[j][1];
        delvz = vztmp - v[j][2];
-        dot = delx*delvx + dely*delvy + delz*delvz;
-        wd = 1.0 - r/cut[itype][jtype];
+        dot = delx * delvx + dely * delvy + delz * delvz;
+        wd = 1.0 - r / cut[itype][jtype];

-        unsigned int tag1=itag, tag2=jtag;
+        unsigned int tag1 = itag, tag2 = jtag;
        if (tag1 > tag2) {
-          tag1 = jtag; tag2 = itag;
+          tag1 = jtag;
+          tag2 = itag;
        }

        randnum = 0.0;
@ -411,15 +392,15 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
        // drag force = -gamma * wd^2 * (delx dot delv) / r
        // random force = sigma * wd * rnd * dtinvsqrt;

-        fpair = -gamma[itype][jtype]*wd*wd*dot*rinv;
-        fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
-        fpair *= factor_dpd*rinv;
+        fpair = -gamma[itype][jtype] * wd * wd * dot * rinv;
+        fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
+        fpair *= factor_dpd * rinv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

-        if (evflag) ev_tally_full(i,0.0,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, 0.0, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_eam_alloy_gpu.cpp
+++ b/src/GPU/pair_eam_alloy_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "gpu_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "potential_file_reader.h"
 #include "suffix.h"
@ -39,32 +37,26 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
-                       int **host_type2rhor, int **host_type2z2r,
-                       int *host_type2frho, double ***host_rhor_spline,
-                       double ***host_z2r_spline, double ***host_frho_spline,
-                       double** host_cutsq, double rdr, double rdrho, double rhomax,
-                       int nrhor, int nrho, int nz2r, int nfrho, int nr,
-                       const int nlocal, const int nall, const int max_nbors,
-                       const int maxspecial, const double cell_size,
-                       int &gpu_mode, FILE *screen, int &fp_size);
+int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
+                       int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
+                       double ***host_z2r_spline, double ***host_frho_spline, double **host_cutsq,
+                       double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r,
+                       int nfrho, int nr, const int nlocal, const int nall, const int max_nbors,
+                       const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                       int &fp_size);
 void eam_alloy_gpu_clear();
-int** eam_alloy_gpu_compute_n(const int ago, const int inum_full,
-                        const int nall, double **host_x, int *host_type,
-                        double *sublo, double *subhi, tagint *tag,
-                        int **nspecial, tagint **special, const bool eflag,
-                        const bool vflag, const bool eatom, const bool vatom,
-                        int &host_start, int **ilist, int **jnum,
-                        const double cpu_time, bool &success,
-                        int &inum, void **fp_ptr);
-void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal,
-                     const int nall,double **host_x, int *host_type,
-                     int *ilist, int *numj, int **firstneigh,
-                     const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success, void **fp_ptr);
-void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom);
+int **eam_alloy_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                              int *host_type, double *sublo, double *subhi, tagint *tag,
+                              int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                              const bool eatom, const bool vatom, int &host_start, int **ilist,
+                              int **jnum, const double cpu_time, bool &success, int &inum,
+                              void **fp_ptr);
+void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal, const int nall,
+                           double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
+                           const bool eflag, const bool vflag, const bool eatom, const bool vatom,
+                           int &host_start, const double cpu_time, bool &success, void **fp_ptr);
+void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, const bool eatom,
+                                 const bool vatom);
 double eam_alloy_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -98,7 +90,7 @@ double PairEAMAlloyGPU::memory_usage()

 void PairEAMAlloyGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  // compute density on each atom on GPU

@ -109,7 +101,7 @@ void PairEAMAlloyGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -118,27 +110,24 @@ void PairEAMAlloyGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = eam_alloy_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, inum_dev, &fp_pinned);
-  } else { // gpu_mode == GPU_FORCE
+    firstneigh = eam_alloy_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
+                                         subhi, atom->tag, atom->nspecial, atom->special, eflag,
+                                         vflag, eflag_atom, vflag_atom, host_start, &ilist,
+                                         &numneigh, cpu_time, success, inum_dev, &fp_pinned);
+  } else {    // gpu_mode == GPU_FORCE
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    eam_alloy_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, &fp_pinned);
+    eam_alloy_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, ilist, numneigh,
+                          firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                          success, &fp_pinned);
  }

-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

  // communicate derivative of embedding function

@ -169,10 +158,9 @@ void PairEAMAlloyGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -180,23 +168,17 @@ void PairEAMAlloyGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int fp_size;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = eam_alloy_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
-                             type2frho, rhor_spline, z2r_spline, frho_spline,
-                             cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
-                             atom->nlocal, atom->nlocal+atom->nghost, mnf,
-                             maxspecial, cell_size, gpu_mode, screen, fp_size);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = eam_alloy_gpu_init(
+      atom->ntypes + 1, cutforcesq, type2rhor, type2z2r, type2frho, rhor_spline, z2r_spline,
+      frho_spline, cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, atom->nlocal,
+      atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, fp_size);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
  if (fp_size == sizeof(double))
    fp_single = false;
  else
@ -207,64 +189,63 @@ void PairEAMAlloyGPU::init_style()

 /* ---------------------------------------------------------------------- */

-double PairEAMAlloyGPU::single(int i, int j, int itype, int jtype,
-                               double rsq, double /* factor_coul */,
-                               double /* factor_lj */, double &fforce)
+double PairEAMAlloyGPU::single(int i, int j, int itype, int jtype, double rsq,
+                               double /* factor_coul */, double /* factor_lj */, double &fforce)
 {
  int m;
-  double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
+  double r, p, rhoip, rhojp, z2, z2p, recip, phi, phip, psip;
  double *coeff;

  r = sqrt(rsq);
-  p = r*rdr + 1.0;
-  m = static_cast<int> (p);
-  m = MIN(m,nr-1);
+  p = r * rdr + 1.0;
+  m = static_cast<int>(p);
+  m = MIN(m, nr - 1);
  p -= m;
-  p = MIN(p,1.0);
+  p = MIN(p, 1.0);

  coeff = rhor_spline[type2rhor[itype][jtype]][m];
-  rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+  rhoip = (coeff[0] * p + coeff[1]) * p + coeff[2];
  coeff = rhor_spline[type2rhor[jtype][itype]][m];
-  rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+  rhojp = (coeff[0] * p + coeff[1]) * p + coeff[2];
  coeff = z2r_spline[type2z2r[itype][jtype]][m];
-  z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
-  z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+  z2p = (coeff[0] * p + coeff[1]) * p + coeff[2];
+  z2 = ((coeff[3] * p + coeff[4]) * p + coeff[5]) * p + coeff[6];

-  double fp_i,fp_j;
+  double fp_i, fp_j;
  if (fp_single == false) {
-    fp_i = ((double*)fp_pinned)[i];
-    fp_j = ((double*)fp_pinned)[j];
+    fp_i = ((double *) fp_pinned)[i];
+    fp_j = ((double *) fp_pinned)[j];
  } else {
-    fp_i = ((float*)fp_pinned)[i];
-    fp_j = ((float*)fp_pinned)[j];
+    fp_i = ((float *) fp_pinned)[i];
+    fp_j = ((float *) fp_pinned)[j];
  }

-  recip = 1.0/r;
-  phi = z2*recip;
-  phip = z2p*recip - phi*recip;
-  psip = fp_i*rhojp + fp_j*rhoip + phip;
-  fforce = -psip*recip;
+  recip = 1.0 / r;
+  phi = z2 * recip;
+  phip = z2p * recip - phi * recip;
+  psip = fp_i * rhojp + fp_j * rhoip + phip;
+  fforce = -psip * recip;

  return phi;
 }

 /* ---------------------------------------------------------------------- */

-int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf,
-                                       int /* pbc_flag */, int * /* pbc */)
+int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf, int /* pbc_flag */,
+                                       int * /* pbc */)
 {
-  int i,j,m;
+  int i, j, m;

  m = 0;

  if (fp_single) {
-    float *fp_ptr = (float *)fp_pinned;
+    float *fp_ptr = (float *) fp_pinned;
    for (i = 0; i < n; i++) {
      j = list[i];
      buf[m++] = static_cast<double>(fp_ptr[j]);
    }
  } else {
-    double *fp_ptr = (double *)fp_pinned;
+    double *fp_ptr = (double *) fp_pinned;
    for (i = 0; i < n; i++) {
      j = list[i];
      buf[m++] = fp_ptr[j];
@ -278,15 +259,15 @@ int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf,

 void PairEAMAlloyGPU::unpack_forward_comm(int n, int first, double *buf)
 {
-  int i,m,last;
+  int i, m, last;

  m = 0;
  last = first + n;
  if (fp_single) {
-    float *fp_ptr = (float *)fp_pinned;
+    float *fp_ptr = (float *) fp_pinned;
    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
  } else {
-    double *fp_ptr = (double *)fp_pinned;
+    double *fp_ptr = (double *) fp_pinned;
    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
  }
 }
@ -298,23 +279,22 @@ void PairEAMAlloyGPU::unpack_forward_comm(int n, int first, double *buf)

 void PairEAMAlloyGPU::coeff(int narg, char **arg)
 {
-  int i,j;
+  int i, j;

  if (!allocated) allocate();

-  if (narg != 3 + atom->ntypes)
-    error->all(FLERR,"Incorrect args for pair coefficients");
+  if (narg != 3 + atom->ntypes) error->all(FLERR, "Incorrect args for pair coefficients");

  // insure I,J args are * *

-  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
-    error->all(FLERR,"Incorrect args for pair coefficients");
+  if (strcmp(arg[0], "*") != 0 || strcmp(arg[1], "*") != 0)
+    error->all(FLERR, "Incorrect args for pair coefficients");

  // read EAM setfl file

  if (setfl) {
-    for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
-    delete [] setfl->elements;
+    for (i = 0; i < setfl->nelements; i++) delete[] setfl->elements[i];
+    delete[] setfl->elements;
    memory->destroy(setfl->mass);
    memory->destroy(setfl->frho);
    memory->destroy(setfl->rhor);
@ -328,22 +308,23 @@ void PairEAMAlloyGPU::coeff(int narg, char **arg)
  // map[i] = which element the Ith atom type is, -1 if "NULL"

  for (i = 3; i < narg; i++) {
-    if (strcmp(arg[i],"NULL") == 0) {
-      map[i-2] = -1;
+    if (strcmp(arg[i], "NULL") == 0) {
+      map[i - 2] = -1;
      continue;
    }
    for (j = 0; j < setfl->nelements; j++)
-      if (strcmp(arg[i],setfl->elements[j]) == 0) break;
-    if (j < setfl->nelements) map[i-2] = j;
-    else error->all(FLERR,"No matching element in EAM potential file");
+      if (strcmp(arg[i], setfl->elements[j]) == 0) break;
+    if (j < setfl->nelements)
+      map[i - 2] = j;
+    else
+      error->all(FLERR, "No matching element in EAM potential file");
  }

  // clear setflag since coeff() called once with I,J = * *

  int n = atom->ntypes;
  for (i = 1; i <= n; i++)
-    for (j = i; j <= n; j++)
-      setflag[i][j] = 0;
+    for (j = i; j <= n; j++) setflag[i][j] = 0;

  // set setflag i,j for type pairs where both are mapped to elements
  // set mass of atom type if i = j
@ -353,14 +334,14 @@ void PairEAMAlloyGPU::coeff(int narg, char **arg)
    for (j = i; j <= n; j++) {
      if (map[i] >= 0 && map[j] >= 0) {
        setflag[i][j] = 1;
-        if (i == j) atom->set_mass(FLERR,i,setfl->mass[map[i]]);
+        if (i == j) atom->set_mass(FLERR, i, setfl->mass[map[i]]);
        count++;
      }
      scale[i][j] = 1.0;
    }
  }

-  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients");
 }

 /* ----------------------------------------------------------------------
@ -378,8 +359,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
    // transparently convert units for supported conversions

    int unit_convert = reader.get_unit_convert();
-    double conversion_factor = utils::get_conversion_factor(utils::ENERGY,
-                                                            unit_convert);
+    double conversion_factor = utils::get_conversion_factor(utils::ENERGY, unit_convert);
    try {
      reader.skip_line();
      reader.skip_line();
@ -389,10 +369,10 @@ void PairEAMAlloyGPU::read_file(char *filename)
      ValueTokenizer values = reader.next_values(1);
      file->nelements = values.next_int();

-      if ((int)values.count() != file->nelements + 1)
-        error->one(FLERR,"Incorrect element names in EAM potential file");
+      if ((int) values.count() != file->nelements + 1)
+        error->one(FLERR, "Incorrect element names in EAM potential file");

-      file->elements = new char*[file->nelements];
+      file->elements = new char *[file->nelements];
      for (int i = 0; i < file->nelements; i++)
        file->elements[i] = utils::strdup(values.next_string());

@ -401,12 +381,12 @@ void PairEAMAlloyGPU::read_file(char *filename)
      values = reader.next_values(5);
      file->nrho = values.next_int();
      file->drho = values.next_double();
-      file->nr   = values.next_int();
-      file->dr   = values.next_double();
-      file->cut  = values.next_double();
+      file->nr = values.next_int();
+      file->dr = values.next_double();
+      file->cut = values.next_double();

      if ((file->nrho <= 0) || (file->nr <= 0) || (file->dr <= 0.0))
-        error->one(FLERR,"Invalid EAM potential file");
+        error->one(FLERR, "Invalid EAM potential file");

      memory->create(file->mass, file->nelements, "pair:mass");
      memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -415,14 +395,13 @@ void PairEAMAlloyGPU::read_file(char *filename)

      for (int i = 0; i < file->nelements; i++) {
        values = reader.next_values(2);
-        values.next_int(); // ignore
+        values.next_int();    // ignore
        file->mass[i] = values.next_double();

        reader.next_dvector(&file->frho[i][1], file->nrho);
        reader.next_dvector(&file->rhor[i][1], file->nr);
        if (unit_convert) {
-          for (int j = 1; j < file->nrho; ++j)
-            file->frho[i][j] *= conversion_factor;
+          for (int j = 1; j < file->nrho; ++j) file->frho[i][j] *= conversion_factor;
        }
      }

@ -430,8 +409,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
        for (int j = 0; j <= i; j++) {
          reader.next_dvector(&file->z2r[i][j][1], file->nr);
          if (unit_convert) {
-            for (int k = 1; k < file->nr; ++k)
-              file->z2r[i][j][k] *= conversion_factor;
+            for (int k = 1; k < file->nr; ++k) file->z2r[i][j][k] *= conversion_factor;
          }
        }
      }
@ -451,7 +429,7 @@ void PairEAMAlloyGPU::read_file(char *filename)

  // allocate memory on other procs
  if (comm->me != 0) {
-    file->elements = new char*[file->nelements];
+    file->elements = new char *[file->nelements];
    for (int i = 0; i < file->nelements; i++) file->elements[i] = nullptr;
    memory->create(file->mass, file->nelements, "pair:mass");
    memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -477,9 +455,7 @@ void PairEAMAlloyGPU::read_file(char *filename)

  // broadcast file->z2r
  for (int i = 0; i < file->nelements; i++) {
-    for (int j = 0; j <= i; j++) {
-      MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world);
-    }
+    for (int j = 0; j <= i; j++) { MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world); }
  }
 }

@ -489,7 +465,7 @@ void PairEAMAlloyGPU::read_file(char *filename)

 void PairEAMAlloyGPU::file2array()
 {
-  int i,j,m,n;
+  int i, j, m, n;
  int ntypes = atom->ntypes;

  // set function params directly from setfl file
@ -498,7 +474,7 @@ void PairEAMAlloyGPU::file2array()
  nr = setfl->nr;
  drho = setfl->drho;
  dr = setfl->dr;
-  rhomax = (nrho-1) * drho;
+  rhomax = (nrho - 1) * drho;

  // ------------------------------------------------------------------
  // setup frho arrays
@ -509,7 +485,7 @@ void PairEAMAlloyGPU::file2array()

  nfrho = setfl->nelements + 1;
  memory->destroy(frho);
-  memory->create(frho,nfrho,nrho+1,"pair:frho");
+  memory->create(frho, nfrho, nrho + 1, "pair:frho");

  // copy each element's frho to global frho

@ -519,15 +495,17 @@ void PairEAMAlloyGPU::file2array()
  // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
  // this is necessary b/c fp is still computed for non-EAM atoms

-  for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
+  for (m = 1; m <= nrho; m++) frho[nfrho - 1][m] = 0.0;

  // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
  // if atom type doesn't point to element (non-EAM atom in pair hybrid)
  // then map it to last frho array of zeroes

  for (i = 1; i <= ntypes; i++)
-    if (map[i] >= 0) type2frho[i] = map[i];
-    else type2frho[i] = nfrho-1;
+    if (map[i] >= 0)
+      type2frho[i] = map[i];
+    else
+      type2frho[i] = nfrho - 1;

  // ------------------------------------------------------------------
  // setup rhor arrays
@ -538,7 +516,7 @@ void PairEAMAlloyGPU::file2array()

  nrhor = setfl->nelements;
  memory->destroy(rhor);
-  memory->create(rhor,nrhor,nr+1,"pair:rhor");
+  memory->create(rhor, nrhor, nr + 1, "pair:rhor");

  // copy each element's rhor to global rhor

@ -550,8 +528,7 @@ void PairEAMAlloyGPU::file2array()
  // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used

  for (i = 1; i <= ntypes; i++)
-    for (j = 1; j <= ntypes; j++)
-      type2rhor[i][j] = map[i];
+    for (j = 1; j <= ntypes; j++) type2rhor[i][j] = map[i];

  // ------------------------------------------------------------------
  // setup z2r arrays
@ -560,9 +537,9 @@ void PairEAMAlloyGPU::file2array()
  // allocate z2r arrays
  // nz2r = N*(N+1)/2 where N = # of setfl elements

-  nz2r = setfl->nelements * (setfl->nelements+1) / 2;
+  nz2r = setfl->nelements * (setfl->nelements + 1) / 2;
  memory->destroy(z2r);
-  memory->create(z2r,nz2r,nr+1,"pair:z2r");
+  memory->create(z2r, nz2r, nr + 1, "pair:z2r");

  // copy each element pair z2r to global z2r, only for I >= J

@ -581,7 +558,7 @@ void PairEAMAlloyGPU::file2array()
  //   type2z2r is not used by non-opt
  //   but set type2z2r to 0 since accessed by opt

-  int irow,icol;
+  int irow, icol;
  for (i = 1; i <= ntypes; i++) {
    for (j = 1; j <= ntypes; j++) {
      irow = map[i];
--- a/src/GPU/pair_eam_fs_gpu.cpp
+++ b/src/GPU/pair_eam_fs_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "gpu_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "potential_file_reader.h"
 #include "suffix.h"
@ -39,31 +37,26 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
-                 int **host_type2rhor, int **host_type2z2r,
-                 int *host_type2frho, double ***host_rhor_spline,
-                 double ***host_z2r_spline, double ***host_frho_spline,
-                 double** host_cutsq, double rdr, double rdrho, double rhomax,
-                 int nrhor, int nrho, int nz2r, int nfrho, int nr,
-                 const int nlocal, const int nall, const int max_nbors,
-                 const int maxspecial, const double cell_size, int &gpu_mode,
-                 FILE *screen, int &fp_size);
+int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
+                    int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
+                    double ***host_z2r_spline, double ***host_frho_spline, double **host_cutsq,
+                    double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r,
+                    int nfrho, int nr, const int nlocal, const int nall, const int max_nbors,
+                    const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                    int &fp_size);
 void eam_fs_gpu_clear();
-int** eam_fs_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, double *sublo,
-                        double *subhi, tagint *tag, int **nspecial,
-                        tagint **special, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        int **ilist, int **jnum,  const double cpu_time,
-                        bool &success, int &inum, void **fp_ptr);
-void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal,
-                        const int nall,double **host_x, int *host_type,
-                        int *ilist, int *numj, int **firstneigh,
-                        const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        const double cpu_time, bool &success, void **fp_ptr);
-void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
-                              const bool eatom, const bool vatom);
+int **eam_fs_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                           int *host_type, double *sublo, double *subhi, tagint *tag,
+                           int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start, int **ilist,
+                           int **jnum, const double cpu_time, bool &success, int &inum,
+                           void **fp_ptr);
+void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
+                        const bool eflag, const bool vflag, const bool eatom, const bool vatom,
+                        int &host_start, const double cpu_time, bool &success, void **fp_ptr);
+void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, const bool eatom,
+                              const bool vatom);
 double eam_fs_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -97,7 +90,7 @@ double PairEAMFSGPU::memory_usage()

 void PairEAMFSGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  // compute density on each atom on GPU

@ -108,7 +101,7 @@ void PairEAMFSGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -117,27 +110,24 @@ void PairEAMFSGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = eam_fs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, inum_dev, &fp_pinned);
-  } else { // gpu_mode == GPU_FORCE
+    firstneigh = eam_fs_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                      atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                      eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                      cpu_time, success, inum_dev, &fp_pinned);
+  } else {    // gpu_mode == GPU_FORCE
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    eam_fs_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, &fp_pinned);
+    eam_fs_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, ilist, numneigh,
+                       firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                       success, &fp_pinned);
  }

-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

  // communicate derivative of embedding function

@ -168,10 +158,9 @@ void PairEAMFSGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -179,23 +168,17 @@ void PairEAMFSGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int fp_size;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = eam_fs_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
-                             type2frho, rhor_spline, z2r_spline, frho_spline,
-                             cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
-                             atom->nlocal, atom->nlocal+atom->nghost, mnf,
-                             maxspecial, cell_size, gpu_mode, screen, fp_size);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = eam_fs_gpu_init(
+      atom->ntypes + 1, cutforcesq, type2rhor, type2z2r, type2frho, rhor_spline, z2r_spline,
+      frho_spline, cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, atom->nlocal,
+      atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, fp_size);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
  if (fp_size == sizeof(double))
    fp_single = false;
  else
@ -206,64 +189,63 @@ void PairEAMFSGPU::init_style()

 /* ---------------------------------------------------------------------- */

-double PairEAMFSGPU::single(int i, int j, int itype, int jtype,
-                            double rsq, double /* factor_coul */,
-                            double /* factor_lj */, double &fforce)
+double PairEAMFSGPU::single(int i, int j, int itype, int jtype, double rsq,
+                            double /* factor_coul */, double /* factor_lj */, double &fforce)
 {
  int m;
-  double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
+  double r, p, rhoip, rhojp, z2, z2p, recip, phi, phip, psip;
  double *coeff;

  r = sqrt(rsq);
-  p = r*rdr + 1.0;
-  m = static_cast<int> (p);
-  m = MIN(m,nr-1);
+  p = r * rdr + 1.0;
+  m = static_cast<int>(p);
+  m = MIN(m, nr - 1);
  p -= m;
-  p = MIN(p,1.0);
+  p = MIN(p, 1.0);

  coeff = rhor_spline[type2rhor[itype][jtype]][m];
-  rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+  rhoip = (coeff[0] * p + coeff[1]) * p + coeff[2];
  coeff = rhor_spline[type2rhor[jtype][itype]][m];
-  rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+  rhojp = (coeff[0] * p + coeff[1]) * p + coeff[2];
  coeff = z2r_spline[type2z2r[itype][jtype]][m];
-  z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
-  z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+  z2p = (coeff[0] * p + coeff[1]) * p + coeff[2];
+  z2 = ((coeff[3] * p + coeff[4]) * p + coeff[5]) * p + coeff[6];

-  double fp_i,fp_j;
+  double fp_i, fp_j;
  if (fp_single == false) {
-    fp_i = ((double*)fp_pinned)[i];
-    fp_j = ((double*)fp_pinned)[j];
+    fp_i = ((double *) fp_pinned)[i];
+    fp_j = ((double *) fp_pinned)[j];
  } else {
-    fp_i = ((float*)fp_pinned)[i];
-    fp_j = ((float*)fp_pinned)[j];
+    fp_i = ((float *) fp_pinned)[i];
+    fp_j = ((float *) fp_pinned)[j];
  }

-  recip = 1.0/r;
-  phi = z2*recip;
-  phip = z2p*recip - phi*recip;
-  psip = fp_i*rhojp + fp_j*rhoip + phip;
-  fforce = -psip*recip;
+  recip = 1.0 / r;
+  phi = z2 * recip;
+  phip = z2p * recip - phi * recip;
+  psip = fp_i * rhojp + fp_j * rhoip + phip;
+  fforce = -psip * recip;

  return phi;
 }

 /* ---------------------------------------------------------------------- */

-int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf,
-                                    int /* pbc_flag */, int * /* pbc */)
+int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf, int /* pbc_flag */,
+                                    int * /* pbc */)
 {
-  int i,j,m;
+  int i, j, m;

  m = 0;

  if (fp_single) {
-    float *fp_ptr = (float *)fp_pinned;
+    float *fp_ptr = (float *) fp_pinned;
    for (i = 0; i < n; i++) {
      j = list[i];
      buf[m++] = static_cast<double>(fp_ptr[j]);
    }
  } else {
-    double *fp_ptr = (double *)fp_pinned;
+    double *fp_ptr = (double *) fp_pinned;
    for (i = 0; i < n; i++) {
      j = list[i];
      buf[m++] = fp_ptr[j];
@ -277,15 +259,15 @@ int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf,

 void PairEAMFSGPU::unpack_forward_comm(int n, int first, double *buf)
 {
-  int i,m,last;
+  int i, m, last;

  m = 0;
  last = first + n;
  if (fp_single) {
-    float *fp_ptr = (float *)fp_pinned;
+    float *fp_ptr = (float *) fp_pinned;
    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
  } else {
-    double *fp_ptr = (double *)fp_pinned;
+    double *fp_ptr = (double *) fp_pinned;
    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
  }
 }
@ -297,23 +279,22 @@ void PairEAMFSGPU::unpack_forward_comm(int n, int first, double *buf)

 void PairEAMFSGPU::coeff(int narg, char **arg)
 {
-  int i,j;
+  int i, j;

  if (!allocated) allocate();

-  if (narg != 3 + atom->ntypes)
-    error->all(FLERR,"Incorrect args for pair coefficients");
+  if (narg != 3 + atom->ntypes) error->all(FLERR, "Incorrect args for pair coefficients");

  // insure I,J args are * *

-  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
-    error->all(FLERR,"Incorrect args for pair coefficients");
+  if (strcmp(arg[0], "*") != 0 || strcmp(arg[1], "*") != 0)
+    error->all(FLERR, "Incorrect args for pair coefficients");

  // read EAM Finnis-Sinclair file

  if (fs) {
-    for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
-    delete [] fs->elements;
+    for (i = 0; i < fs->nelements; i++) delete[] fs->elements[i];
+    delete[] fs->elements;
    memory->destroy(fs->mass);
    memory->destroy(fs->frho);
    memory->destroy(fs->rhor);
@ -327,22 +308,23 @@ void PairEAMFSGPU::coeff(int narg, char **arg)
  // map[i] = which element the Ith atom type is, -1 if "NULL"

  for (i = 3; i < narg; i++) {
-    if (strcmp(arg[i],"NULL") == 0) {
-      map[i-2] = -1;
+    if (strcmp(arg[i], "NULL") == 0) {
+      map[i - 2] = -1;
      continue;
    }
    for (j = 0; j < fs->nelements; j++)
-      if (strcmp(arg[i],fs->elements[j]) == 0) break;
-    if (j < fs->nelements) map[i-2] = j;
-    else error->all(FLERR,"No matching element in EAM potential file");
+      if (strcmp(arg[i], fs->elements[j]) == 0) break;
+    if (j < fs->nelements)
+      map[i - 2] = j;
+    else
+      error->all(FLERR, "No matching element in EAM potential file");
  }

  // clear setflag since coeff() called once with I,J = * *

  int n = atom->ntypes;
  for (i = 1; i <= n; i++)
-    for (j = i; j <= n; j++)
-      setflag[i][j] = 0;
+    for (j = i; j <= n; j++) setflag[i][j] = 0;

  // set setflag i,j for type pairs where both are mapped to elements
  // set mass of atom type if i = j
@ -352,14 +334,14 @@ void PairEAMFSGPU::coeff(int narg, char **arg)
    for (j = i; j <= n; j++) {
      if (map[i] >= 0 && map[j] >= 0) {
        setflag[i][j] = 1;
-        if (i == j) atom->set_mass(FLERR,i,fs->mass[map[i]]);
+        if (i == j) atom->set_mass(FLERR, i, fs->mass[map[i]]);
        count++;
      }
      scale[i][j] = 1.0;
    }
  }

-  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients");
 }

 /* ----------------------------------------------------------------------
@ -372,14 +354,12 @@ void PairEAMFSGPU::read_file(char *filename)

  // read potential file
  if (comm->me == 0) {
-    PotentialFileReader reader(PairEAM::lmp, filename, "eam/fs",
-                               unit_convert_flag);
+    PotentialFileReader reader(PairEAM::lmp, filename, "eam/fs", unit_convert_flag);

    // transparently convert units for supported conversions

    int unit_convert = reader.get_unit_convert();
-    double conversion_factor = utils::get_conversion_factor(utils::ENERGY,
-                                                            unit_convert);
+    double conversion_factor = utils::get_conversion_factor(utils::ENERGY, unit_convert);
    try {
      reader.skip_line();
      reader.skip_line();
@ -389,10 +369,10 @@ void PairEAMFSGPU::read_file(char *filename)
      ValueTokenizer values = reader.next_values(1);
      file->nelements = values.next_int();

-      if ((int)values.count() != file->nelements + 1)
-        error->one(FLERR,"Incorrect element names in EAM potential file");
+      if ((int) values.count() != file->nelements + 1)
+        error->one(FLERR, "Incorrect element names in EAM potential file");

-      file->elements = new char*[file->nelements];
+      file->elements = new char *[file->nelements];
      for (int i = 0; i < file->nelements; i++) {
        const std::string word = values.next_string();
        file->elements[i] = utils::strdup(word);
@ -403,13 +383,13 @@ void PairEAMFSGPU::read_file(char *filename)
      values = reader.next_values(5);
      file->nrho = values.next_int();
      file->drho = values.next_double();
-      file->nr   = values.next_int();
-      file->dr   = values.next_double();
-      file->cut  = values.next_double();
+      file->nr = values.next_int();
+      file->dr = values.next_double();
+      file->cut = values.next_double();
      rhomax = 0.0;

      if ((file->nrho <= 0) || (file->nr <= 0) || (file->dr <= 0.0))
-        error->one(FLERR,"Invalid EAM potential file");
+        error->one(FLERR, "Invalid EAM potential file");

      memory->create(file->mass, file->nelements, "pair:mass");
      memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -418,13 +398,12 @@ void PairEAMFSGPU::read_file(char *filename)

      for (int i = 0; i < file->nelements; i++) {
        values = reader.next_values(2);
-        values.next_int(); // ignore
+        values.next_int();    // ignore
        file->mass[i] = values.next_double();

        reader.next_dvector(&file->frho[i][1], file->nrho);
        if (unit_convert) {
-          for (int j = 1; j <= file->nrho; ++j)
-            file->frho[i][j] *= conversion_factor;
+          for (int j = 1; j <= file->nrho; ++j) file->frho[i][j] *= conversion_factor;
        }

        for (int j = 0; j < file->nelements; j++) {
@ -436,8 +415,7 @@ void PairEAMFSGPU::read_file(char *filename)
        for (int j = 0; j <= i; j++) {
          reader.next_dvector(&file->z2r[i][j][1], file->nr);
          if (unit_convert) {
-            for (int k = 1; k <= file->nr; ++k)
-              file->z2r[i][j][k] *= conversion_factor;
+            for (int k = 1; k <= file->nr; ++k) file->z2r[i][j][k] *= conversion_factor;
          }
        }
      }
@ -458,7 +436,7 @@ void PairEAMFSGPU::read_file(char *filename)

  // allocate memory on other procs
  if (comm->me != 0) {
-    file->elements = new char*[file->nelements];
+    file->elements = new char *[file->nelements];
    for (int i = 0; i < file->nelements; i++) file->elements[i] = nullptr;
    memory->create(file->mass, file->nelements, "pair:mass");
    memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -487,9 +465,7 @@ void PairEAMFSGPU::read_file(char *filename)

  // broadcast file->z2r
  for (int i = 0; i < file->nelements; i++) {
-    for (int j = 0; j <= i; j++) {
-      MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world);
-    }
+    for (int j = 0; j <= i; j++) { MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world); }
  }
 }

@ -499,7 +475,7 @@ void PairEAMFSGPU::read_file(char *filename)

 void PairEAMFSGPU::file2array()
 {
-  int i,j,m,n;
+  int i, j, m, n;
  int ntypes = atom->ntypes;

  // set function params directly from fs file
@ -508,7 +484,7 @@ void PairEAMFSGPU::file2array()
  nr = fs->nr;
  drho = fs->drho;
  dr = fs->dr;
-  rhomax = (nrho-1) * drho;
+  rhomax = (nrho - 1) * drho;

  // ------------------------------------------------------------------
  // setup frho arrays
@ -519,7 +495,7 @@ void PairEAMFSGPU::file2array()

  nfrho = fs->nelements + 1;
  memory->destroy(frho);
-  memory->create(frho,nfrho,nrho+1,"pair:frho");
+  memory->create(frho, nfrho, nrho + 1, "pair:frho");

  // copy each element's frho to global frho

@ -529,15 +505,17 @@ void PairEAMFSGPU::file2array()
  // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
  // this is necessary b/c fp is still computed for non-EAM atoms

-  for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
+  for (m = 1; m <= nrho; m++) frho[nfrho - 1][m] = 0.0;

  // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
  // if atom type doesn't point to element (non-EAM atom in pair hybrid)
  // then map it to last frho array of zeroes

  for (i = 1; i <= ntypes; i++)
-    if (map[i] >= 0) type2frho[i] = map[i];
-    else type2frho[i] = nfrho-1;
+    if (map[i] >= 0)
+      type2frho[i] = map[i];
+    else
+      type2frho[i] = nfrho - 1;

  // ------------------------------------------------------------------
  // setup rhor arrays
@ -548,7 +526,7 @@ void PairEAMFSGPU::file2array()

  nrhor = fs->nelements * fs->nelements;
  memory->destroy(rhor);
-  memory->create(rhor,nrhor,nr+1,"pair:rhor");
+  memory->create(rhor, nrhor, nr + 1, "pair:rhor");

  // copy each element pair rhor to global rhor

@ -564,8 +542,7 @@ void PairEAMFSGPU::file2array()
  // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used

  for (i = 1; i <= ntypes; i++)
-    for (j = 1; j <= ntypes; j++)
-      type2rhor[i][j] = map[i] * fs->nelements + map[j];
+    for (j = 1; j <= ntypes; j++) type2rhor[i][j] = map[i] * fs->nelements + map[j];

  // ------------------------------------------------------------------
  // setup z2r arrays
@ -574,9 +551,9 @@ void PairEAMFSGPU::file2array()
  // allocate z2r arrays
  // nz2r = N*(N+1)/2 where N = # of fs elements

-  nz2r = fs->nelements * (fs->nelements+1) / 2;
+  nz2r = fs->nelements * (fs->nelements + 1) / 2;
  memory->destroy(z2r);
-  memory->create(z2r,nz2r,nr+1,"pair:z2r");
+  memory->create(z2r, nz2r, nr + 1, "pair:z2r");

  // copy each element pair z2r to global z2r, only for I >= J

@ -595,7 +572,7 @@ void PairEAMFSGPU::file2array()
  //   type2z2r is not used by non-opt
  //   but set type2z2r to 0 since accessed by opt

-  int irow,icol;
+  int irow, icol;
  for (i = 1; i <= ntypes; i++) {
    for (j = 1; j <= ntypes; j++) {
      irow = map[i];
--- a/src/GPU/pair_eam_gpu.cpp
+++ b/src/GPU/pair_eam_gpu.cpp
@ -24,7 +24,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -36,31 +35,25 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int eam_gpu_init(const int ntypes, double host_cutforcesq,
-                 int **host_type2rhor, int **host_type2z2r,
-                 int *host_type2frho, double ***host_rhor_spline,
-                 double ***host_z2r_spline, double ***host_frho_spline,
-                 double** host_cutsq, double rdr, double rdrho, double rhomax,
-                 int nrhor, int nrho, int nz2r, int nfrho, int nr,
-                 const int nlocal, const int nall, const int max_nbors,
-                 const int maxspecial, const double cell_size, int &gpu_mode,
-                 FILE *screen, int &fp_size);
+int eam_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
+                 int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
+                 double ***host_z2r_spline, double ***host_frho_spline, double **host_cutsq,
+                 double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho,
+                 int nr, const int nlocal, const int nall, const int max_nbors,
+                 const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                 int &fp_size);
 void eam_gpu_clear();
-int** eam_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, double *sublo,
-                        double *subhi, tagint *tag, int **nspecial,
-                        tagint **special, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        int **ilist, int **jnum,  const double cpu_time,
-                        bool &success, int &inum, void **fp_ptr);
-void eam_gpu_compute(const int ago, const int inum_full, const int nlocal,
-                     const int nall,double **host_x, int *host_type,
-                     int *ilist, int *numj, int **firstneigh,
-                     const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success, void **fp_ptr);
-void eam_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom);
+int **eam_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, int &inum, void **fp_ptr);
+void eam_gpu_compute(const int ago, const int inum_full, const int nlocal, const int nall,
+                     double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
+                     const bool eflag, const bool vflag, const bool eatom, const bool vatom,
+                     int &host_start, const double cpu_time, bool &success, void **fp_ptr);
+void eam_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, const bool eatom,
+                           const bool vatom);
 double eam_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -95,7 +88,7 @@ double PairEAMGPU::memory_usage()

 void PairEAMGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  // compute density on each atom on GPU

@ -106,7 +99,7 @@ void PairEAMGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -115,27 +108,24 @@ void PairEAMGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = eam_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, inum_dev, &fp_pinned);
-  } else { // gpu_mode == GPU_FORCE
+    firstneigh =
+        eam_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                          atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                          host_start, &ilist, &numneigh, cpu_time, success, inum_dev, &fp_pinned);
+  } else {    // gpu_mode == GPU_FORCE
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    eam_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, &fp_pinned);
+    eam_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, ilist, numneigh,
+                    firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success,
+                    &fp_pinned);
  }

-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

  // communicate derivative of embedding function

@ -165,10 +155,9 @@ void PairEAMGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -176,23 +165,17 @@ void PairEAMGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int fp_size;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = eam_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
-                             type2frho, rhor_spline, z2r_spline, frho_spline,
-                             cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
-                             atom->nlocal, atom->nlocal+atom->nghost, mnf,
+  int success = eam_gpu_init(atom->ntypes + 1, cutforcesq, type2rhor, type2z2r, type2frho,
+                             rhor_spline, z2r_spline, frho_spline, cutsq, rdr, rdrho, rhomax, nrhor,
+                             nrho, nz2r, nfrho, nr, atom->nlocal, atom->nlocal + atom->nghost, mnf,
                             maxspecial, cell_size, gpu_mode, screen, fp_size);
-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
  if (fp_size == sizeof(double))
    fp_single = false;
  else
@ -203,64 +186,63 @@ void PairEAMGPU::init_style()

 /* ---------------------------------------------------------------------- */

-double PairEAMGPU::single(int i, int j, int itype, int jtype,
-                          double rsq, double /* factor_coul */,
+double PairEAMGPU::single(int i, int j, int itype, int jtype, double rsq, double /* factor_coul */,
                          double /* factor_lj */, double &fforce)
 {
  int m;
-  double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
+  double r, p, rhoip, rhojp, z2, z2p, recip, phi, phip, psip;
  double *coeff;

  r = sqrt(rsq);
-  p = r*rdr + 1.0;
-  m = static_cast<int> (p);
-  m = MIN(m,nr-1);
+  p = r * rdr + 1.0;
+  m = static_cast<int>(p);
+  m = MIN(m, nr - 1);
  p -= m;
-  p = MIN(p,1.0);
+  p = MIN(p, 1.0);

  coeff = rhor_spline[type2rhor[itype][jtype]][m];
-  rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+  rhoip = (coeff[0] * p + coeff[1]) * p + coeff[2];
  coeff = rhor_spline[type2rhor[jtype][itype]][m];
-  rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+  rhojp = (coeff[0] * p + coeff[1]) * p + coeff[2];
  coeff = z2r_spline[type2z2r[itype][jtype]][m];
-  z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
-  z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+  z2p = (coeff[0] * p + coeff[1]) * p + coeff[2];
+  z2 = ((coeff[3] * p + coeff[4]) * p + coeff[5]) * p + coeff[6];

-  double fp_i,fp_j;
+  double fp_i, fp_j;
  if (fp_single == false) {
-    fp_i = ((double*)fp_pinned)[i];
-    fp_j = ((double*)fp_pinned)[j];
+    fp_i = ((double *) fp_pinned)[i];
+    fp_j = ((double *) fp_pinned)[j];
  } else {
-    fp_i = ((float*)fp_pinned)[i];
-    fp_j = ((float*)fp_pinned)[j];
+    fp_i = ((float *) fp_pinned)[i];
+    fp_j = ((float *) fp_pinned)[j];
  }

-  recip = 1.0/r;
-  phi = z2*recip;
-  phip = z2p*recip - phi*recip;
-  psip = fp_i*rhojp + fp_j*rhoip + phip;
-  fforce = -psip*recip;
+  recip = 1.0 / r;
+  phi = z2 * recip;
+  phip = z2p * recip - phi * recip;
+  psip = fp_i * rhojp + fp_j * rhoip + phip;
+  fforce = -psip * recip;

  return phi;
 }

 /* ---------------------------------------------------------------------- */

-int PairEAMGPU::pack_forward_comm(int n, int *list, double *buf,
-                                  int /* pbc_flag */, int * /* pbc */)
+int PairEAMGPU::pack_forward_comm(int n, int *list, double *buf, int /* pbc_flag */,
+                                  int * /* pbc */)
 {
-  int i,j,m;
+  int i, j, m;

  m = 0;

  if (fp_single) {
-    float *fp_ptr = (float *)fp_pinned;
+    float *fp_ptr = (float *) fp_pinned;
    for (i = 0; i < n; i++) {
      j = list[i];
      buf[m++] = static_cast<double>(fp_ptr[j]);
    }
  } else {
-    double *fp_ptr = (double *)fp_pinned;
+    double *fp_ptr = (double *) fp_pinned;
    for (i = 0; i < n; i++) {
      j = list[i];
      buf[m++] = fp_ptr[j];
@ -274,15 +256,15 @@ int PairEAMGPU::pack_forward_comm(int n, int *list, double *buf,

 void PairEAMGPU::unpack_forward_comm(int n, int first, double *buf)
 {
-  int i,m,last;
+  int i, m, last;

  m = 0;
  last = first + n;
  if (fp_single) {
-    float *fp_ptr = (float *)fp_pinned;
+    float *fp_ptr = (float *) fp_pinned;
    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
  } else {
-    double *fp_ptr = (double *)fp_pinned;
+    double *fp_ptr = (double *) fp_pinned;
    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
  }
 }
--- a/src/GPU/pair_gauss_gpu.cpp
+++ b/src/GPU/pair_gauss_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,25 +32,20 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
-                   double **b, double **offset, double *special_lj,
-                   const int nlocal, const int nall, const int max_nbors,
-                   const int maxspecial, const double cell_size,
-                   int &gpu_mode, FILE *screen);
-void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
-                      double **b, double **offset);
+int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a, double **b, double **offset,
+                   double *special_lj, const int nlocal, const int nall, const int max_nbors,
+                   const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
+void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a, double **b,
+                      double **offset);
 void gauss_gpu_clear();
-int ** gauss_gpu_compute_n(const int ago, const int inum, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum,
-                           const double cpu_time, bool &success);
-void gauss_gpu_compute(const int ago, const int inum, const int nall,
-                       double **host_x, int *host_type, int *ilist, int *numj,
-                       int **firstneigh, const bool eflag, const bool vflag,
-                       const bool eatom, const bool vatom, int &host_start,
+int **gauss_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                          int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                          tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                          const bool vatom, int &host_start, int **ilist, int **jnum,
+                          const double cpu_time, bool &success);
+void gauss_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                       const bool vflag, const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double gauss_gpu_bytes();

@ -79,7 +72,7 @@ PairGaussGPU::~PairGaussGPU()

 void PairGaussGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -87,7 +80,7 @@ void PairGaussGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -96,28 +89,24 @@ void PairGaussGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = gauss_gpu_compute_n(neighbor->ago, inum, nall,
-                                     atom->x, atom->type, sublo,
-                                     subhi, atom->tag, atom->nspecial,
-                                     atom->special, eflag, vflag, eflag_atom,
-                                     vflag_atom, host_start,
-                                     &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        gauss_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                            atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                            host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    gauss_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success);
+    gauss_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                      eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -137,10 +126,9 @@ void PairGaussGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -148,21 +136,15 @@ void PairGaussGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = gauss_gpu_init(atom->ntypes+1, cutsq, a, b,
-                               offset, force->special_lj, atom->nlocal,
-                               atom->nlocal+atom->nghost, mnf, maxspecial,
-                               cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      gauss_gpu_init(atom->ntypes + 1, cutsq, a, b, offset, force->special_lj, atom->nlocal,
+                     atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -171,7 +153,7 @@ void PairGaussGPU::reinit()
 {
  Pair::reinit();

-  gauss_gpu_reinit(atom->ntypes+1, cutsq, a, b, offset);
+  gauss_gpu_reinit(atom->ntypes + 1, cutsq, a, b, offset);
 }

 /* ---------------------------------------------------------------------- */
@ -184,11 +166,12 @@ double PairGaussGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                               int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,forcelj,factor_lj;
+void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                               int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, forcelj, factor_lj;
  int *jlist;

  double **x = atom->x;
@ -215,26 +198,24 @@ void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        forcelj = - 2.0*a[itype][jtype]*b[itype][jtype] * rsq *
-          exp(-b[itype][jtype]*rsq);
-        fpair = factor_lj*forcelj*r2inv;
+        r2inv = 1.0 / rsq;
+        forcelj = -2.0 * a[itype][jtype] * b[itype][jtype] * rsq * exp(-b[itype][jtype] * rsq);
+        fpair = factor_lj * forcelj * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = -(a[itype][jtype]*exp(-b[itype][jtype]*rsq) -
-            offset[itype][jtype]);
+          evdwl = -(a[itype][jtype] * exp(-b[itype][jtype] * rsq) - offset[itype][jtype]);
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_gayberne_gpu.cpp
+++ b/src/GPU/pair_gayberne_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -27,7 +26,6 @@
 #include "math_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -37,35 +35,29 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int gb_gpu_init(const int ntypes, const double gamma, const double upsilon,
-                const double mu, double **shape, double **well, double **cutsq,
-                double **sigma, double **epsilon, double *host_lshape,
-                int **form, double **host_lj1, double **host_lj2,
-                double **host_lj3, double **host_lj4, double **offset,
-                double *special_lj, const int nlocal, const int nall,
-                const int max_nbors, const int maxspecial,
+int gb_gpu_init(const int ntypes, const double gamma, const double upsilon, const double mu,
+                double **shape, double **well, double **cutsq, double **sigma, double **epsilon,
+                double *host_lshape, int **form, double **host_lj1, double **host_lj2,
+                double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                const int nlocal, const int nall, const int max_nbors, const int maxspecial,
                const double cell_size, int &gpu_mode, FILE *screen);
 void gb_gpu_clear();
-int ** gb_gpu_compute_n(const int ago, const int inum, const int nall,
-                        double **host_x, int *host_type, double *sublo,
-                        double *subhi, tagint *tag, int **nspecial,
-                        tagint **special, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        int **ilist, int **jnum, const double cpu_time,
-                        bool &success, double **host_quat);
-int * gb_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success, double **host_quat);
+int **gb_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                       const bool vatom, int &host_start, int **ilist, int **jnum,
+                       const double cpu_time, bool &success, double **host_quat);
+int *gb_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                    int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                    const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                    bool &success, double **host_quat);
 double gb_gpu_bytes();

-enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
+enum { SPHERE_SPHERE, SPHERE_ELLIPSE, ELLIPSE_SPHERE, ELLIPSE_ELLIPSE };

 /* ---------------------------------------------------------------------- */

-PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp),
-                                                gpu_mode(GPU_FORCE)
+PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), gpu_mode(GPU_FORCE)
 {
  quat_nmax = 0;
  reinitflag = 0;
@ -89,7 +81,7 @@ PairGayBerneGPU::~PairGayBerneGPU()

 void PairGayBerneGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -103,7 +95,7 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
  }
  AtomVecEllipsoid::Bonus *bonus = avec->bonus;
  int *ellipsoid = atom->ellipsoid;
-  for (int i=0; i<nall; i++) {
+  for (int i = 0; i < nall; i++) {
    int qi = ellipsoid[i];
    if (qi > -1) {
      quat[i][0] = bonus[qi].quat[0];
@ -114,7 +106,7 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
  }

  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -123,26 +115,22 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                  atom->type, sublo, subhi,
-                                  atom->tag, atom->nspecial, atom->special,
-                                  eflag, vflag, eflag_atom, vflag_atom,
-                                  host_start, &ilist, &numneigh, cpu_time,
-                                  success, quat);
+    firstneigh =
+        gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                         atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                         host_start, &ilist, &numneigh, cpu_time, success, quat);
  } else {
    inum = list->inum;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ilist = gb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                           list->ilist, numneigh, firstneigh, eflag, vflag,
-                           eflag_atom, vflag_atom, host_start,
-                           cpu_time, success, quat);
+    ilist = gb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, list->ilist, numneigh,
+                           firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                           success, quat);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

  if (host_start < inum) {
    cpu_time = platform::walltime();
@ -158,25 +146,22 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
 void PairGayBerneGPU::init_style()
 {
  avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
-  if (!avec)
-    error->all(FLERR,"Pair gayberne/gpu requires atom style ellipsoid");
-  if (!atom->ellipsoid_flag)
-    error->all(FLERR,"Pair gayberne/gpu requires atom style ellipsoid");
+  if (!avec) error->all(FLERR, "Pair gayberne/gpu requires atom style ellipsoid");
+  if (!atom->ellipsoid_flag) error->all(FLERR, "Pair gayberne/gpu requires atom style ellipsoid");

  // per-type shape precalculations
  // require that atom shapes are identical within each type
  // if shape = 0 for point particle, set shape = 1 as required by Gay-Berne

  for (int i = 1; i <= atom->ntypes; i++) {
-    if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
-      error->all(FLERR,"Pair gayberne/gpu requires atoms with same type have same shape");
-    if (shape1[i][0] == 0.0)
-      shape1[i][0] = shape1[i][1] = shape1[i][2] = 1.0;
-    shape2[i][0] = shape1[i][0]*shape1[i][0];
-    shape2[i][1] = shape1[i][1]*shape1[i][1];
-    shape2[i][2] = shape1[i][2]*shape1[i][2];
-    lshape[i] = (shape1[i][0]*shape1[i][1]+shape1[i][2]*shape1[i][2]) *
-      sqrt(shape1[i][0]*shape1[i][1]);
+    if (!atom->shape_consistency(i, shape1[i][0], shape1[i][1], shape1[i][2]))
+      error->all(FLERR, "Pair gayberne/gpu requires atoms with same type have same shape");
+    if (shape1[i][0] == 0.0) shape1[i][0] = shape1[i][1] = shape1[i][2] = 1.0;
+    shape2[i][0] = shape1[i][0] * shape1[i][0];
+    shape2[i][1] = shape1[i][1] * shape1[i][1];
+    shape2[i][2] = shape1[i][2] * shape1[i][2];
+    lshape[i] = (shape1[i][0] * shape1[i][1] + shape1[i][2] * shape1[i][2]) *
+        sqrt(shape1[i][0] * shape1[i][1]);
  }

  // Repeat cutsq calculation because done after call to init_style
@ -185,10 +170,9 @@ void PairGayBerneGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -197,22 +181,16 @@ void PairGayBerneGPU::init_style()

  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = gb_gpu_init(atom->ntypes+1, gamma, upsilon, mu,
-                            shape2, well, cutsq, sigma, epsilon, lshape, form,
-                            lj1, lj2, lj3, lj4, offset, force->special_lj,
-                            atom->nlocal, atom->nlocal+atom->nghost, mnf,
-                            maxspecial, cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      gb_gpu_init(atom->ntypes + 1, gamma, upsilon, mu, shape2, well, cutsq, sigma, epsilon, lshape,
+                  form, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal,
+                  atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
  quat_nmax = static_cast<int>(1.1 * (atom->nlocal + atom->nghost));
  memory->grow(quat, quat_nmax, 4, "pair:quat");
 }
@ -222,21 +200,20 @@ void PairGayBerneGPU::init_style()
 double PairGayBerneGPU::memory_usage()
 {
  double bytes = Pair::memory_usage();
-  return bytes + memory->usage(quat,quat_nmax)+gb_gpu_bytes();
+  return bytes + memory->usage(quat, quat_nmax) + gb_gpu_bytes();
 }

 /* ---------------------------------------------------------------------- */

-void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
-                                  int /* vflag */, int *ilist,
+void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                  int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
-  double fforce[3],ttor[3],rtor[3],r12[3];
-  double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3];
+  int i, j, ii, jj, jnum, itype, jtype;
+  double evdwl, one_eng, rsq, r2inv, r6inv, forcelj, factor_lj;
+  double fforce[3], ttor[3], rtor[3], r12[3];
+  double a1[3][3], b1[3][3], g1[3][3], a2[3][3], b2[3][3], g2[3][3], temp[3][3];
  int *jlist;
-  double *iquat,*jquat;
+  double *iquat, *jquat;

  AtomVecEllipsoid::Bonus *bonus = avec->bonus;
  int *ellipsoid = atom->ellipsoid;
@ -254,11 +231,11 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,

    if (form[itype][itype] == ELLIPSE_ELLIPSE) {
      iquat = bonus[ellipsoid[i]].quat;
-      MathExtra::quat_to_mat_trans(iquat,a1);
-      MathExtra::diag_times3(well[itype],a1,temp);
-      MathExtra::transpose_times3(a1,temp,b1);
-      MathExtra::diag_times3(shape2[itype],a1,temp);
-      MathExtra::transpose_times3(a1,temp,g1);
+      MathExtra::quat_to_mat_trans(iquat, a1);
+      MathExtra::diag_times3(well[itype], a1, temp);
+      MathExtra::transpose_times3(a1, temp, b1);
+      MathExtra::diag_times3(shape2[itype], a1, temp);
+      MathExtra::transpose_times3(a1, temp, g1);
    }

    jlist = firstneigh[i];
@ -271,10 +248,10 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,

      // r12 = center to center vector

-      r12[0] = x[j][0]-x[i][0];
-      r12[1] = x[j][1]-x[i][1];
-      r12[2] = x[j][2]-x[i][2];
-      rsq = MathExtra::dot3(r12,r12);
+      r12[0] = x[j][0] - x[i][0];
+      r12[1] = x[j][1] - x[i][1];
+      r12[2] = x[j][2] - x[i][2];
+      rsq = MathExtra::dot3(r12, r12);
      jtype = type[j];

      // compute if less than cutoff
@ -282,47 +259,46 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
      if (rsq < cutsq[itype][jtype]) {

        switch (form[itype][jtype]) {
-        case SPHERE_SPHERE:
-          r2inv = 1.0/rsq;
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-          forcelj *= -r2inv;
-          if (eflag) one_eng =
-            r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
-            offset[itype][jtype];
-          fforce[0] = r12[0]*forcelj;
-          fforce[1] = r12[1]*forcelj;
-          fforce[2] = r12[2]*forcelj;
-          ttor[0] = ttor[1] = ttor[2] = 0.0;
-          rtor[0] = rtor[1] = rtor[2] = 0.0;
-          break;
+          case SPHERE_SPHERE:
+            r2inv = 1.0 / rsq;
+            r6inv = r2inv * r2inv * r2inv;
+            forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+            forcelj *= -r2inv;
+            if (eflag)
+              one_eng =
+                  r6inv * (r6inv * lj3[itype][jtype] - lj4[itype][jtype]) - offset[itype][jtype];
+            fforce[0] = r12[0] * forcelj;
+            fforce[1] = r12[1] * forcelj;
+            fforce[2] = r12[2] * forcelj;
+            ttor[0] = ttor[1] = ttor[2] = 0.0;
+            rtor[0] = rtor[1] = rtor[2] = 0.0;
+            break;

-        case SPHERE_ELLIPSE:
-          jquat = bonus[ellipsoid[j]].quat;
-          MathExtra::quat_to_mat_trans(jquat,a2);
-          MathExtra::diag_times3(well[jtype],a2,temp);
-          MathExtra::transpose_times3(a2,temp,b2);
-          MathExtra::diag_times3(shape2[jtype],a2,temp);
-          MathExtra::transpose_times3(a2,temp,g2);
-          one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor);
-          ttor[0] = ttor[1] = ttor[2] = 0.0;
-          break;
+          case SPHERE_ELLIPSE:
+            jquat = bonus[ellipsoid[j]].quat;
+            MathExtra::quat_to_mat_trans(jquat, a2);
+            MathExtra::diag_times3(well[jtype], a2, temp);
+            MathExtra::transpose_times3(a2, temp, b2);
+            MathExtra::diag_times3(shape2[jtype], a2, temp);
+            MathExtra::transpose_times3(a2, temp, g2);
+            one_eng = gayberne_lj(j, i, a2, b2, g2, r12, rsq, fforce, rtor);
+            ttor[0] = ttor[1] = ttor[2] = 0.0;
+            break;

-        case ELLIPSE_SPHERE:
-          one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor);
-          rtor[0] = rtor[1] = rtor[2] = 0.0;
-          break;
+          case ELLIPSE_SPHERE:
+            one_eng = gayberne_lj(i, j, a1, b1, g1, r12, rsq, fforce, ttor);
+            rtor[0] = rtor[1] = rtor[2] = 0.0;
+            break;

-        default:
-          jquat = bonus[ellipsoid[j]].quat;
-          MathExtra::quat_to_mat_trans(jquat,a2);
-          MathExtra::diag_times3(well[jtype],a2,temp);
-          MathExtra::transpose_times3(a2,temp,b2);
-          MathExtra::diag_times3(shape2[jtype],a2,temp);
-          MathExtra::transpose_times3(a2,temp,g2);
-          one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq,
-                                      fforce,ttor,rtor);
-          break;
+          default:
+            jquat = bonus[ellipsoid[j]].quat;
+            MathExtra::quat_to_mat_trans(jquat, a2);
+            MathExtra::diag_times3(well[jtype], a2, temp);
+            MathExtra::transpose_times3(a2, temp, b2);
+            MathExtra::diag_times3(shape2[jtype], a2, temp);
+            MathExtra::transpose_times3(a2, temp, g2);
+            one_eng = gayberne_analytic(i, j, a1, a2, b1, b2, g1, g2, r12, rsq, fforce, ttor, rtor);
+            break;
        }

        fforce[0] *= factor_lj;
@ -339,10 +315,11 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
        tor[i][1] += ttor[1];
        tor[i][2] += ttor[2];

-        if (eflag) evdwl = factor_lj*one_eng;
+        if (eflag) evdwl = factor_lj * one_eng;

-        if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],fforce[2],
-                                      -r12[0],-r12[1],-r12[2]);
+        if (evflag)
+          ev_tally_xyz_full(i, evdwl, 0.0, fforce[0], fforce[1], fforce[2], -r12[0], -r12[1],
+                            -r12[2]);
      }
    }
  }
--- a/src/GPU/pair_lj96_cut_gpu.cpp
+++ b/src/GPU/pair_lj96_cut_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,23 +32,19 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
+int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen);
 void lj96_gpu_clear();
-int ** lj96_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum,
-                          const double cpu_time, bool &success);
-void lj96_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
+int **lj96_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void lj96_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double lj96_gpu_bytes();

@ -78,7 +72,7 @@ PairLJ96CutGPU::~PairLJ96CutGPU()

 void PairLJ96CutGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -86,7 +80,7 @@ void PairLJ96CutGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -95,28 +89,24 @@ void PairLJ96CutGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success);
+    firstneigh =
+        lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success);
+    lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -131,17 +121,15 @@ void PairLJ96CutGPU::init_style()
 {
  cut_respa = nullptr;

-
  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
  double cut;
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -149,21 +137,15 @@ void PairLJ96CutGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                              offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = lj96_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset,
+                              force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                              maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -176,13 +158,12 @@ double PairLJ96CutGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJ96CutGPU::cpu_compute(int start, int inum, int eflag,
-                                 int /* vflag */, int *ilist,
+void PairLJ96CutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                 int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r3inv, r6inv, forcelj, factor_lj;
  int *jlist;

  double **x = atom->x;
@ -209,27 +190,26 @@ void PairLJ96CutGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        r6inv = r2inv*r2inv*r2inv;
+        r2inv = 1.0 / rsq;
+        r6inv = r2inv * r2inv * r2inv;
        r3inv = sqrt(r6inv);
-        forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
-        fpair = factor_lj*forcelj*r2inv;
+        forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
+        fpair = factor_lj * forcelj * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
-            offset[itype][jtype];
+          evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_charmm_coul_charmm_gpu.cpp
+++ b/src/GPU/pair_lj_charmm_coul_charmm_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"

 #include <cmath>
@ -33,38 +31,30 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
-                 double **host_lj2, double **host_lj3, double **host_lj4,
-                 double *special_lj, const int nlocal,
-                 const int nall, const int max_nbors, const int maxspecial,
-                 const double cell_size, int &gpu_mode, FILE *screen,
-                 double host_cut_ljsq, double host_cut_coulsq,
-                 double *host_special_coul, const double qqrd2e,
-                 const double cut_lj_innersq, const double cut_coul_innersq,
-                 const double denom_lj, const double denom_coul,
-                 double **epsilon, double **sigma,
-                 const bool mix_arithmetic);
+int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, double **host_lj2,
+                 double **host_lj3, double **host_lj4, double *special_lj, const int nlocal,
+                 const int nall, const int max_nbors, const int maxspecial, const double cell_size,
+                 int &gpu_mode, FILE *screen, double host_cut_ljsq, double host_cut_coulsq,
+                 double *host_special_coul, const double qqrd2e, const double cut_lj_innersq,
+                 const double cut_coul_innersq, const double denom_lj, const double denom_coul,
+                 double **epsilon, double **sigma, const bool mix_arithmetic);
 void crm_gpu_clear();
-int ** crm_gpu_compute_n(const int ago, const int inum,
-                         const int nall, double **host_x, int *host_type,
-                         double *sublo, double *subhi, tagint *tag,
-                         int **nspecial, tagint **special, const bool eflag,
-                         const bool vflag, const bool eatom,
-                         const bool vatom, int &host_start, int **ilist,
-                         int **jnum, const double cpu_time, bool &success,
-                         double *host_q, double *boxlo, double *prd);
-void crm_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success, double *host_q,
-                     const int nlocal, double *boxlo, double *prd);
+int **crm_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double *host_q, double *boxlo,
+                        double *prd);
+void crm_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
 double crm_gpu_bytes();

 /* ---------------------------------------------------------------------- */

 PairLJCharmmCoulCharmmGPU::PairLJCharmmCoulCharmmGPU(LAMMPS *lmp) :
-  PairLJCharmmCoulCharmm(lmp), gpu_mode(GPU_FORCE)
+    PairLJCharmmCoulCharmm(lmp), gpu_mode(GPU_FORCE)
 {
  reinitflag = 0;
  cpu_time = 0.0;
@ -84,8 +74,10 @@ PairLJCharmmCoulCharmmGPU::~PairLJCharmmCoulCharmmGPU()

 void PairLJCharmmCoulCharmmGPU::compute(int eflag, int vflag)
 {
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = vflag_fdotr = 0;
+  if (eflag || vflag)
+    ev_setup(eflag, vflag);
+  else
+    evflag = vflag_fdotr = 0;

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -94,27 +86,22 @@ void PairLJCharmmCoulCharmmGPU::compute(int eflag, int vflag)
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
    inum = atom->nlocal;
-    firstneigh = crm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, domain->sublo, domain->subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->q, domain->boxlo,
-                                   domain->prd);
+    firstneigh = crm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, domain->sublo,
+                                   domain->subhi, atom->tag, atom->nspecial, atom->special, eflag,
+                                   vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                   cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    crm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, atom->q,
+    crm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                    atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -134,8 +121,7 @@ void PairLJCharmmCoulCharmmGPU::init_style()

  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
-      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
-        init_one(i,j);
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) init_one(i, j);
    }
  }

@ -143,46 +129,37 @@ void PairLJCharmmCoulCharmmGPU::init_style()
  cut_coul_innersq = cut_coul_inner * cut_coul_inner;
  cut_ljsq = cut_lj * cut_lj;
  cut_coulsq = cut_coul * cut_coul;
-  cut_bothsq = MAX(cut_ljsq,cut_coulsq);
+  cut_bothsq = MAX(cut_ljsq, cut_coulsq);

-  denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
-    (cut_ljsq-cut_lj_innersq);
+  denom_lj =
+      (cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq);
  denom_lj = 1.0 / denom_lj;

-  denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
-    (cut_coulsq-cut_coul_innersq);
+  denom_coul = (cut_coulsq - cut_coul_innersq) * (cut_coulsq - cut_coul_innersq) *
+      (cut_coulsq - cut_coul_innersq);
  denom_coul = 1.0 / denom_coul;

  double cell_size = sqrt(cut_bothsq) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;

  bool arithmetic = true;
  for (int i = 1; i < atom->ntypes + 1; i++)
    for (int j = i + 1; j < atom->ntypes + 1; j++) {
-      if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j]))
-        arithmetic = false;
-      if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j]))
-        arithmetic = false;
+      if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j])) arithmetic = false;
+      if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j])) arithmetic = false;
    }

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = crm_gpu_init(atom->ntypes+1, cut_bothsq, lj1, lj2, lj3, lj4,
-                             force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen, cut_ljsq,
-                             cut_coulsq, force->special_coul, force->qqrd2e,
-                             cut_lj_innersq,cut_coul_innersq,denom_lj,
-                             denom_coul,epsilon,sigma,arithmetic);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      crm_gpu_init(atom->ntypes + 1, cut_bothsq, lj1, lj2, lj3, lj4, force->special_lj,
+                   atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                   screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, cut_lj_innersq,
+                   cut_coul_innersq, denom_lj, denom_coul, epsilon, sigma, arithmetic);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -195,14 +172,13 @@ double PairLJCharmmCoulCharmmGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCharmmCoulCharmmGPU::cpu_compute(int start, int inum, int eflag,
-                                            int /* vflag */, int *ilist,
-                                            int *numneigh, int **firstneigh)
+void PairLJCharmmCoulCharmmGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
+                                            int *ilist, int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double philj,switch1,switch2;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double philj, switch1, switch2;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -236,64 +212,66 @@ void PairLJCharmmCoulCharmmGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;

      if (rsq < cut_bothsq) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
-          forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
+          forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
          if (rsq > cut_coul_innersq) {
-            switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
-              (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * denom_coul;
+            switch1 = (cut_coulsq - rsq) * (cut_coulsq - rsq) *
+                (cut_coulsq + 2.0 * rsq - 3.0 * cut_coul_innersq) * denom_coul;
            forcecoul *= switch1;
          }
-        } else forcecoul = 0.0;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq) {
-          r6inv = r2inv*r2inv*r2inv;
+          r6inv = r2inv * r2inv * r2inv;
          jtype = type[j];
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
          if (rsq > cut_lj_innersq) {
-            switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
-              (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj;
-            switch2 = 12.0*rsq * (cut_ljsq-rsq) *
-              (rsq-cut_lj_innersq) * denom_lj;
-            philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
-            forcelj = forcelj*switch1 + philj*switch2;
+            switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
+                (cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) * denom_lj;
+            switch2 = 12.0 * rsq * (cut_ljsq - rsq) * (rsq - cut_lj_innersq) * denom_lj;
+            philj = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
+            forcelj = forcelj * switch1 + philj * switch2;
          }
-        } else forcelj = 0.0;
+        } else
+          forcelj = 0.0;

-        fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = (factor_coul * forcecoul + factor_lj * forcelj) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
-            ecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
+            ecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
            if (rsq > cut_coul_innersq) {
-              switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
-                (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) *
-                denom_coul;
+              switch1 = (cut_coulsq - rsq) * (cut_coulsq - rsq) *
+                  (cut_coulsq + 2.0 * rsq - 3.0 * cut_coul_innersq) * denom_coul;
              ecoul *= switch1;
            }
            ecoul *= factor_coul;
-          } else ecoul = 0.0;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
            if (rsq > cut_lj_innersq) {
-              switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
-                (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj;
+              switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
+                  (cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) * denom_lj;
              evdwl *= switch1;
            }
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,55 +24,48 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  double host_cut_ljsq, double host_cut_coulsq,
-                  double *host_special_coul, const double qqrd2e,
-                  const double g_ewald, const double cut_lj_innersq,
-                  const double denom_lj, double **epsilon, double **sigma,
-                  const bool mix_arithmetic);
+int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen, double host_cut_ljsq,
+                  double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                  const double g_ewald, const double cut_lj_innersq, const double denom_lj,
+                  double **epsilon, double **sigma, const bool mix_arithmetic);
 void crml_gpu_clear();
-int ** crml_gpu_compute_n(const int ago, const int inum,
-                          const int nall, double **host_x, int *host_type,
-                          double *sublo, double *subhi, tagint *tag,
-                          int **nspecial, tagint **special, const bool eflag,
-                          const bool vflag, const bool eatom, const bool vatom,
-                          int &host_start, int **ilist, int **jnum,
-                          const double cpu_time, bool &success, double *host_q,
-                          double *boxlo, double *prd);
-void crml_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
-                      const double cpu_time, bool &success, double *host_q,
-                      const int nlocal, double *boxlo, double *prd);
+int **crml_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void crml_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
+                      double *boxlo, double *prd);
 double crml_gpu_bytes();

 /* ---------------------------------------------------------------------- */

 PairLJCharmmCoulLongGPU::PairLJCharmmCoulLongGPU(LAMMPS *lmp) :
-  PairLJCharmmCoulLong(lmp), gpu_mode(GPU_FORCE)
+    PairLJCharmmCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -95,7 +87,7 @@ PairLJCharmmCoulLongGPU::~PairLJCharmmCoulLongGPU()

 void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -103,7 +95,7 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -112,30 +104,25 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    crml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
+    crml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -151,65 +138,54 @@ void PairLJCharmmCoulLongGPU::init_style()
  cut_respa = nullptr;

  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/charmm/coul/long/gpu requires atom attribute q");
+    error->all(FLERR, "Pair style lj/charmm/coul/long/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style

  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
-      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
-        init_one(i,j);
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) init_one(i, j);
    }
  }

  cut_lj_innersq = cut_lj_inner * cut_lj_inner;
  cut_ljsq = cut_lj * cut_lj;
  cut_coulsq = cut_coul * cut_coul;
-  cut_bothsq = MAX(cut_ljsq,cut_coulsq);
+  cut_bothsq = MAX(cut_ljsq, cut_coulsq);

-  denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
-    (cut_ljsq-cut_lj_innersq);
+  denom_lj =
+      (cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq);

  double cell_size = sqrt(cut_bothsq) + neighbor->skin;

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;

  bool arithmetic = true;
  for (int i = 1; i < atom->ntypes + 1; i++)
    for (int j = i + 1; j < atom->ntypes + 1; j++) {
-      if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j]))
-        arithmetic = false;
-      if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j]))
-        arithmetic = false;
+      if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j])) arithmetic = false;
+      if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j])) arithmetic = false;
    }

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = crml_gpu_init(atom->ntypes+1, cut_bothsq, lj1, lj2, lj3, lj4,
-                              offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen, cut_ljsq,
-                              cut_coulsq, force->special_coul, force->qqrd2e,
-                              g_ewald, cut_lj_innersq,denom_lj,epsilon,sigma,
-                              arithmetic);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      crml_gpu_init(atom->ntypes + 1, cut_bothsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                    screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald,
+                    cut_lj_innersq, denom_lj, epsilon, sigma, arithmetic);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -222,16 +198,15 @@ double PairLJCharmmCoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCharmmCoulLongGPU::cpu_compute(int start, int inum, int eflag,
-                                          int /* vflag */, int *ilist,
-                                          int *numneigh, int **firstneigh)
+void PairLJCharmmCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
+                                          int *ilist, int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype,itable;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double fraction,table;
-  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double grij,expm2,prefactor,t,erfc;
-  double philj,switch1,switch2;
+  int i, j, ii, jj, jnum, itype, jtype, itable;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double fraction, table;
+  double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double grij, expm2, prefactor, t, erfc;
+  double philj, switch1, switch2;
  int *jlist;
  double rsq;

@ -266,80 +241,83 @@ void PairLJCharmmCoulLongGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;

      if (rsq < cut_bothsq) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
          if (!ncoultablebits || rsq <= tabinnersq) {
            r = sqrt(rsq);
            grij = g_ewald * r;
-            expm2 = exp(-grij*grij);
-            t = 1.0 / (1.0 + EWALD_P*grij);
-            erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-            prefactor = qqrd2e * qtmp*q[j]/r;
-            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-            if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+            expm2 = exp(-grij * grij);
+            t = 1.0 / (1.0 + EWALD_P * grij);
+            erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+            prefactor = qqrd2e * qtmp * q[j] / r;
+            forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+            if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
          } else {
            union_int_float_t rsq_lookup;
            rsq_lookup.f = rsq;
            itable = rsq_lookup.i & ncoulmask;
            itable >>= ncoulshiftbits;
            fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
-            table = ftable[itable] + fraction*dftable[itable];
-            forcecoul = qtmp*q[j] * table;
+            table = ftable[itable] + fraction * dftable[itable];
+            forcecoul = qtmp * q[j] * table;
            if (factor_coul < 1.0) {
-              table = ctable[itable] + fraction*dctable[itable];
-              prefactor = qtmp*q[j] * table;
-              forcecoul -= (1.0-factor_coul)*prefactor;
+              table = ctable[itable] + fraction * dctable[itable];
+              prefactor = qtmp * q[j] * table;
+              forcecoul -= (1.0 - factor_coul) * prefactor;
            }
          }
-        } else forcecoul = 0.0;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq) {
-          r6inv = r2inv*r2inv*r2inv;
+          r6inv = r2inv * r2inv * r2inv;
          jtype = type[j];
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
          if (rsq > cut_lj_innersq) {
-            switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
-              (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
-            switch2 = 12.0*rsq * (cut_ljsq-rsq) *
-              (rsq-cut_lj_innersq) / denom_lj;
-            philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
-            forcelj = forcelj*switch1 + philj*switch2;
+            switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
+                (cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) / denom_lj;
+            switch2 = 12.0 * rsq * (cut_ljsq - rsq) * (rsq - cut_lj_innersq) / denom_lj;
+            philj = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
+            forcelj = forcelj * switch1 + philj * switch2;
          }
-        } else forcelj = 0.0;
+        } else
+          forcelj = 0.0;

-        fpair = (forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = (forcecoul + factor_lj * forcelj) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            if (!ncoultablebits || rsq <= tabinnersq)
-              ecoul = prefactor*erfc;
+              ecoul = prefactor * erfc;
            else {
-              table = etable[itable] + fraction*detable[itable];
-              ecoul = qtmp*q[j] * table;
+              table = etable[itable] + fraction * detable[itable];
+              ecoul = qtmp * q[j] * table;
            }
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
            if (rsq > cut_lj_innersq) {
-              switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
-                (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+              switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
+                  (cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) / denom_lj;
              evdwl *= switch1;
            }
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_class2_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_class2_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,53 +24,47 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  double **host_cut_ljsq, double host_cut_coulsq,
-                  double *host_special_coul, const double qqrd2e,
+int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                  double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
                  const double g_ewald);
 void c2cl_gpu_clear();
-int ** c2cl_gpu_compute_n(const int ago, const int inum,
-                          const int nall, double **host_x, int *host_type,
-                          double *sublo, double *subhi, tagint *tag,
-                          int **nspecial, tagint **special, const bool eflag,
-                          const bool vflag, const bool eatom, const bool vatom,
-                          int &host_start, int **ilist, int **jnum,
-                          const double cpu_time, bool &success, double *host_q,
-                          double *boxlo, double *prd);
-void c2cl_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
-                      const double cpu_time, bool &success, double *host_q,
-                      const int nlocal, double *boxlo, double *prd);
+int **c2cl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void c2cl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
+                      double *boxlo, double *prd);
 double c2cl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

 PairLJClass2CoulLongGPU::PairLJClass2CoulLongGPU(LAMMPS *lmp) :
-  PairLJClass2CoulLong(lmp), gpu_mode(GPU_FORCE)
+    PairLJClass2CoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  cpu_time = 0.0;
  reinitflag = 0;
@ -92,7 +85,7 @@ PairLJClass2CoulLongGPU::~PairLJClass2CoulLongGPU()

 void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -100,7 +93,7 @@ void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -109,30 +102,25 @@ void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = c2cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = c2cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    c2cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
+    c2cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -146,7 +134,7 @@ void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
 void PairLJClass2CoulLongGPU::init_style()
 {
  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/class2/coul/long/gpu requires atom attribute q");
+    error->all(FLERR, "Pair style lj/class2/coul/long/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -154,10 +142,9 @@ void PairLJClass2CoulLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -169,30 +156,23 @@ void PairLJClass2CoulLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = c2cl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                              offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                              force->special_coul, force->qqrd2e, g_ewald);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      c2cl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                    screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -205,15 +185,14 @@ double PairLJClass2CoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJClass2CoulLongGPU::cpu_compute(int start, int inum, int eflag,
-                                          int /* vflag */, int *ilist,
-                                          int *numneigh, int **firstneigh)
+void PairLJClass2CoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
+                                          int *ilist, int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double rsq,r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj;
-  double grij,expm2,prefactor,t,erfc;
-  double factor_coul,factor_lj;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double rsq, r, rinv, r2inv, r3inv, r6inv, forcecoul, forcelj;
+  double grij, expm2, prefactor, t, erfc;
+  double factor_coul, factor_lj;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -247,49 +226,52 @@ void PairLJClass2CoulLongGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
          r = sqrt(rsq);
          grij = g_ewald * r;
-          expm2 = exp(-grij*grij);
-          t = 1.0 / (1.0 + EWALD_P*grij);
-          erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-          prefactor = qqrd2e * qtmp*q[j]/r;
-          forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
-        } else forcecoul = 0.0;
+          expm2 = exp(-grij * grij);
+          t = 1.0 / (1.0 + EWALD_P * grij);
+          erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+          prefactor = qqrd2e * qtmp * q[j] / r;
+          forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
          rinv = sqrt(r2inv);
-          r3inv = r2inv*rinv;
-          r6inv = r3inv*r3inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
-        } else forcelj = 0.0;
+          r3inv = r2inv * rinv;
+          r6inv = r3inv * r3inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
+        } else
+          forcelj = 0.0;

-        fpair = (forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = (forcecoul + factor_lj * forcelj) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
-            ecoul = prefactor*erfc;
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            ecoul = prefactor * erfc;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
-              offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_class2_gpu.cpp
+++ b/src/GPU/pair_lj_class2_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,23 +32,19 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
+int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen);
 void lj96_gpu_clear();
-int **lj96_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum,
+int **lj96_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
                         const double cpu_time, bool &success);
-void lj96_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
+void lj96_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double lj96_gpu_bytes();

@ -78,7 +72,7 @@ PairLJClass2GPU::~PairLJClass2GPU()

 void PairLJClass2GPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -86,7 +80,7 @@ void PairLJClass2GPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -95,28 +89,24 @@ void PairLJClass2GPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success);
+    firstneigh =
+        lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success);
+    lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -136,10 +126,9 @@ void PairLJClass2GPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -147,21 +136,15 @@ void PairLJClass2GPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                              offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = lj96_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset,
+                              force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                              maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -174,13 +157,12 @@ double PairLJClass2GPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJClass2GPU::cpu_compute(int start, int inum, int eflag,
-                                  int /* vflag */, int *ilist,
+void PairLJClass2GPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                  int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r3inv, r6inv, forcelj, factor_lj;
  int *jlist;

  double **x = atom->x;
@ -207,27 +189,26 @@ void PairLJClass2GPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        r6inv = r2inv*r2inv*r2inv;
+        r2inv = 1.0 / rsq;
+        r6inv = r2inv * r2inv * r2inv;
        r3inv = sqrt(r6inv);
-        forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
-        fpair = factor_lj*forcelj*r2inv;
+        forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
+        fpair = factor_lj * forcelj * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
-            offset[itype][jtype];
+          evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cubic_gpu.cpp
+++ b/src/GPU/pair_lj_cubic_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -37,32 +35,27 @@ using namespace PairLJCubicConstants;

 // External functions from cuda library for atom decomposition

-int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq,
-                  double **cut_inner, double **sigma, double **epsilon,
-                  double **host_lj1, double **host_lj2, double **host_lj3,
-                  double **host_lj4, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen);
+int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq, double **cut_inner,
+                  double **sigma, double **epsilon, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double *special_lj, const int nlocal,
+                  const int nall, const int max_nbors, const int maxspecial, const double cell_size,
+                  int &gpu_mode, FILE *screen);

 void ljcb_gpu_clear();
-int ** ljcb_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success);
-void ljcb_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
+int **ljcb_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void ljcb_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double ljcb_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairLJCubicGPU::PairLJCubicGPU(LAMMPS *lmp) : PairLJCubic(lmp),
-  gpu_mode(GPU_FORCE)
+PairLJCubicGPU::PairLJCubicGPU(LAMMPS *lmp) : PairLJCubic(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  cpu_time = 0.0;
@ -84,7 +77,7 @@ PairLJCubicGPU::~PairLJCubicGPU()

 void PairLJCubicGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -92,7 +85,7 @@ void PairLJCubicGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -101,28 +94,24 @@ void PairLJCubicGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljcb_gpu_compute_n(neighbor->ago, inum, nall,
-                                    atom->x, atom->type, sublo,
-                                    subhi, atom->tag, atom->nspecial,
-                                    atom->special, eflag, vflag, eflag_atom,
-                                    vflag_atom, host_start,
-                                    &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        ljcb_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljcb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success);
+    ljcb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -142,10 +131,9 @@ void PairLJCubicGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        mcut = init_one(i,j);
+        mcut = init_one(i, j);
        mcut *= mcut;
-        if (mcut > maxcut)
-          maxcut = mcut;
+        if (mcut > maxcut) maxcut = mcut;
        cutsq[i][j] = cutsq[j][i] = mcut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -153,22 +141,16 @@ void PairLJCubicGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljcb_gpu_init(atom->ntypes+1, cutsq, cut_inner_sq,
-                              cut_inner, sigma, epsilon, lj1, lj2,
-                              lj3, lj4, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      ljcb_gpu_init(atom->ntypes + 1, cutsq, cut_inner_sq, cut_inner, sigma, epsilon, lj1, lj2, lj3,
+                    lj4, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                    maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -181,13 +163,13 @@ double PairLJCubicGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCubicGPU::cpu_compute(int start, int inum, int eflag,
-                                 int /* vflag */, int *ilist,
-                                 int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
-  double r,t,rmin;
+void PairLJCubicGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                 int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r6inv, forcelj, factor_lj;
+  double r, t, rmin;
  int *jlist;

  double **x = atom->x;
@ -214,36 +196,35 @@ void PairLJCubicGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        if (rsq <= cut_inner_sq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
        } else {
          r = sqrt(rsq);
-          rmin = sigma[itype][jtype]*RT6TWO;
-          t = (r - cut_inner[itype][jtype])/rmin;
-          forcelj = epsilon[itype][jtype]*(-DPHIDS + A3*t*t/2.0)*r/rmin;
+          rmin = sigma[itype][jtype] * RT6TWO;
+          t = (r - cut_inner[itype][jtype]) / rmin;
+          forcelj = epsilon[itype][jtype] * (-DPHIDS + A3 * t * t / 2.0) * r / rmin;
        }
-        fpair = factor_lj*forcelj*r2inv;
+        fpair = factor_lj * forcelj * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq <= cut_inner_sq[itype][jtype])
-            evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
          else
-            evdwl = epsilon[itype][jtype]*
-              (PHIS + DPHIDS*t - A3*t*t*t/6.0);
+            evdwl = epsilon[itype][jtype] * (PHIS + DPHIDS * t - A3 * t * t * t / 6.0);
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,29 +32,22 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                 double **host_lj2, double **host_lj3, double **host_lj4,
-                 double **offset, double *special_lj, const int nlocal,
-                 const int nall, const int max_nbors, const int maxspecial,
-                 const double cell_size, int &gpu_mode, FILE *screen,
-                 double **host_cut_ljsq, double **host_cut_coulsq,
-                 double *host_special_coul, const double qqrd2e);
+int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                 double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                 const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                 const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                 double **host_cut_coulsq, double *host_special_coul, const double qqrd2e);
 void ljc_gpu_clear();
-int ** ljc_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum, const double cpu_time,
-                         bool &success, double *host_q, double *boxlo,
-                         double *prd);
-void ljc_gpu_compute(const int ago, const int inum,
-                     const int nall, double **host_x, int *host_type,
-                     int *ilist, int *numj, int **firstneigh,
-                     const bool eflag, const bool vflag, const bool eatom,
-                     const bool vatom, int &host_start, const double cpu_time,
-                     bool &success, double *host_q, const int nlocal,
-                     double *boxlo, double *prd);
+int **ljc_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double *host_q, double *boxlo,
+                        double *prd);
+void ljc_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
 double ljc_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -83,7 +74,7 @@ PairLJCutCoulCutGPU::~PairLJCutCoulCutGPU()

 void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -91,7 +82,7 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -100,30 +91,25 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->q, domain->boxlo,
-                                   domain->prd);
+    firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                   atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                   eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                   success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, atom->q,
+    ljc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                    atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -136,9 +122,7 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)

 void PairLJCutCoulCutGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/cut/coul/cut/gpu requires atom attribute q");
-
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/cut/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -146,10 +130,9 @@ void PairLJCutCoulCutGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -157,22 +140,16 @@ void PairLJCutCoulCutGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljc_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                             force->special_coul, force->qqrd2e);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      ljc_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                   atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                   screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -185,13 +162,12 @@ double PairLJCutCoulCutGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutCoulCutGPU::cpu_compute(int start, int inum, int eflag,
-                                      int /* vflag */, int *ilist,
+void PairLJCutCoulCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                      int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -225,39 +201,42 @@ void PairLJCutCoulCutGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq[itype][jtype])
-          forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
-        else forcecoul = 0.0;
+          forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
+        else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        } else forcelj = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        } else
+          forcelj = 0.0;

-        fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = (factor_coul * forcecoul + factor_lj * forcelj) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq[itype][jtype])
-            ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
-          else ecoul = 0.0;
+            ecoul = factor_coul * qqrd2e * qtmp * q[j] * sqrt(r2inv);
+          else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-              offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_coul_debye_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_debye_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,36 +32,30 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  double **host_cut_ljsq, double **host_cut_coulsq,
-                  double *host_special_coul, const double qqrd2e,
+int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                  double **host_cut_coulsq, double *host_special_coul, const double qqrd2e,
                  const double kappa);
 void ljcd_gpu_clear();
-int ** ljcd_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success, double *host_q, double *boxlo,
-                          double *prd);
-void ljcd_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type,
-                      int *ilist, int *numj, int **firstneigh,
-                      const bool eflag, const bool vflag, const bool eatom,
-                      const bool vatom, int &host_start, const double cpu_time,
-                      bool &success, double *host_q, const int nlocal,
+int **ljcd_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void ljcd_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
                      double *boxlo, double *prd);
 double ljcd_gpu_bytes();

 /* ---------------------------------------------------------------------- */

 PairLJCutCoulDebyeGPU::PairLJCutCoulDebyeGPU(LAMMPS *lmp) :
-  PairLJCutCoulDebye(lmp), gpu_mode(GPU_FORCE)
+    PairLJCutCoulDebye(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -78,14 +70,14 @@ PairLJCutCoulDebyeGPU::PairLJCutCoulDebyeGPU(LAMMPS *lmp) :

 PairLJCutCoulDebyeGPU::~PairLJCutCoulDebyeGPU()
 {
-ljcd_gpu_clear();
+  ljcd_gpu_clear();
 }

 /* ---------------------------------------------------------------------- */

 void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -93,7 +85,7 @@ void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -102,30 +94,25 @@ void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljcd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = ljcd_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljcd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
+    ljcd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -139,8 +126,7 @@ void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
 void PairLJCutCoulDebyeGPU::init_style()
 {
  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/cut/coul/debye/gpu requires atom attribute q");
-
+    error->all(FLERR, "Pair style lj/cut/coul/debye/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -148,10 +134,9 @@ void PairLJCutCoulDebyeGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -159,23 +144,16 @@ void PairLJCutCoulDebyeGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljcd_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                              offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen, cut_ljsq,
-                              cut_coulsq, force->special_coul,
-                              force->qqrd2e, kappa);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      ljcd_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                    screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, kappa);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -188,14 +166,13 @@ double PairLJCutCoulDebyeGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
-                                        int /* vflag */, int *ilist,
+void PairLJCutCoulDebyeGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                        int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double r,rinv,screening;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double r, rinv, screening;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -229,42 +206,45 @@ void PairLJCutCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq[itype][jtype]) {
          r = sqrt(rsq);
-          rinv = 1.0/r;
-          screening = exp(-kappa*r);
-          forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv);
-        } else forcecoul = 0.0;
+          rinv = 1.0 / r;
+          screening = exp(-kappa * r);
+          forcecoul = qqrd2e * qtmp * q[j] * screening * (kappa + rinv);
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        } else forcelj = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        } else
+          forcelj = 0.0;

-        fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = (factor_coul * forcecoul + factor_lj * forcelj) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq[itype][jtype])
-            ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening;
-          else ecoul = 0.0;
+            ecoul = factor_coul * qqrd2e * qtmp * q[j] * rinv * screening;
+          else
+            ecoul = 0.0;
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-            offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_coul_dsf_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_dsf_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,50 +23,41 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

 #define MY_PIS 1.77245385090551602729
-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                 double **host_lj2, double **host_lj3, double **host_lj4,
-                 double **offset, double *special_lj, const int nlocal,
-                 const int nall, const int max_nbors, const int maxspecial,
-                 const double cell_size, int &gpu_mode, FILE *screen,
-                 double **host_cut_ljsq, const double host_cut_coulsq,
-                 double *host_special_coul, const double qqrd2e,
-                 const double e_shift, const double f_shift,
-                 const double alpha);
+int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                 double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                 const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                 const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                 const double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                 const double e_shift, const double f_shift, const double alpha);
 void ljd_gpu_clear();
-int ** ljd_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum, const double cpu_time,
-                         bool &success, double *host_q, double *boxlo,
-                         double *prd);
-void ljd_gpu_compute(const int ago, const int inum,
-                     const int nall, double **host_x, int *host_type,
-                     int *ilist, int *numj, int **firstneigh,
-                     const bool eflag, const bool vflag, const bool eatom,
-                     const bool vatom, int &host_start, const double cpu_time,
-                     bool &success, double *host_q, const int nlocal,
-                     double *boxlo, double *prd);
+int **ljd_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double *host_q, double *boxlo,
+                        double *prd);
+void ljd_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
 double ljd_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -94,7 +84,7 @@ PairLJCutCoulDSFGPU::~PairLJCutCoulDSFGPU()

 void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -102,7 +92,7 @@ void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -111,30 +101,25 @@ void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->q, domain->boxlo,
-                                   domain->prd);
+    firstneigh = ljd_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                   atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                   eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                   success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, atom->q,
+    ljd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                    atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -147,9 +132,7 @@ void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)

 void PairLJCutCoulDSFGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/cut/coul/dsf/gpu requires atom attribute q");
-
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/dsf/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -157,10 +140,9 @@ void PairLJCutCoulDSFGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -169,28 +151,21 @@ void PairLJCutCoulDSFGPU::init_style()
  double cell_size = sqrt(maxcut) + neighbor->skin;

  cut_coulsq = cut_coul * cut_coul;
-  double erfcc = erfc(alpha*cut_coul);
-  double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
-  f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul);
-  e_shift = erfcc/cut_coul - f_shift*cut_coul;
+  double erfcc = erfc(alpha * cut_coul);
+  double erfcd = exp(-alpha * alpha * cut_coul * cut_coul);
+  f_shift = -(erfcc / cut_coulsq + 2.0 / MY_PIS * alpha * erfcd / cut_coul);
+  e_shift = erfcc / cut_coul - f_shift * cut_coul;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljd_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                             force->special_coul, force->qqrd2e, e_shift,
-                             f_shift, alpha);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = ljd_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                             atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
+                             gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul,
+                             force->qqrd2e, e_shift, f_shift, alpha);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -203,14 +178,13 @@ double PairLJCutCoulDSFGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
-                                      int /* vflag */, int *ilist,
+void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                      int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double prefactor,erfcc,erfcd,t;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double r, rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double prefactor, erfcc, erfcd, t;
  int *jlist;

  evdwl = ecoul = 0.0;
@ -237,8 +211,8 @@ void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
    jnum = numneigh[i];

    if (evflag) {
-      double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
-      ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
+      double e_self = -(e_shift / 2.0 + alpha / MY_PIS) * qtmp * qtmp * qqrd2e;
+      ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);
    }

    for (jj = 0; jj < jnum; jj++) {
@ -250,47 +224,48 @@ void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        } else forcelj = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        } else
+          forcelj = 0.0;

        if (rsq < cut_coulsq) {
          r = sqrt(rsq);
-          prefactor = qqrd2e*qtmp*q[j]/r;
-          erfcd = exp(-alpha*alpha*r*r);
-          t = 1.0 / (1.0 + EWALD_P*alpha*r);
-          erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
-          forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd +
-            r*f_shift) * r;
-          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+          prefactor = qqrd2e * qtmp * q[j] / r;
+          erfcd = exp(-alpha * alpha * r * r);
+          t = 1.0 / (1.0 + EWALD_P * alpha * r);
+          erfcc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * erfcd;
+          forcecoul = prefactor * (erfcc / r + 2.0 * alpha / MY_PIS * erfcd + r * f_shift) * r;
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
        }

-        fpair = (forcecoul + factor_lj*forcelj) * r2inv;
-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        fpair = (forcecoul + factor_lj * forcelj) * r2inv;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-                    offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;

          if (rsq < cut_coulsq) {
-            ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            ecoul = prefactor * (erfcc - r * e_shift - rsq * f_shift);
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,56 +24,49 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  double **host_cut_ljsq, double host_cut_coulsq,
-                  double *host_special_coul, const double qqrd2e,
+int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                  double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
                  const double g_ewald);
-void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
-                     double **host_lj2, double **host_lj3, double **host_lj4,
-                     double **offset, double **host_lj_cutsq);
+void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                     double **host_lj3, double **host_lj4, double **offset, double **host_lj_cutsq);
 void ljcl_gpu_clear();
-int ** ljcl_gpu_compute_n(const int ago, const int inum,
-                          const int nall, double **host_x, int *host_type,
-                          double *sublo, double *subhi, tagint *tag,
-                          int **nspecial, tagint **special, const bool eflag,
-                          const bool vflag, const bool eatom, const bool vatom,
-                          int &host_start, int **ilist, int **jnum,
-                          const double cpu_time, bool &success, double *host_q,
-                          double *boxlo, double *prd);
-void ljcl_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
-                      const double cpu_time, bool &success, double *host_q,
-                      const int nlocal, double *boxlo, double *prd);
+int **ljcl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void ljcl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
+                      double *boxlo, double *prd);
 double ljcl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

 PairLJCutCoulLongGPU::PairLJCutCoulLongGPU(LAMMPS *lmp) :
-  PairLJCutCoulLong(lmp), gpu_mode(GPU_FORCE)
+    PairLJCutCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  cpu_time = 0.0;
@ -95,7 +87,7 @@ PairLJCutCoulLongGPU::~PairLJCutCoulLongGPU()

 void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -103,7 +95,7 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -112,30 +104,25 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
+    ljcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -150,8 +137,7 @@ void PairLJCutCoulLongGPU::init_style()
 {
  cut_respa = nullptr;

-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/cut/coul/long/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/long/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -159,10 +145,9 @@ void PairLJCutCoulLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -174,30 +159,23 @@ void PairLJCutCoulLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljcl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                              offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                              force->special_coul, force->qqrd2e, g_ewald);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      ljcl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                    screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -206,7 +184,7 @@ void PairLJCutCoulLongGPU::reinit()
 {
  Pair::reinit();

-  ljcl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, cut_ljsq);
+  ljcl_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, cut_ljsq);
 }

 /* ---------------------------------------------------------------------- */
@ -219,15 +197,14 @@ double PairLJCutCoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutCoulLongGPU::cpu_compute(int start, int inum, int eflag,
-                                       int /* vflag */, int *ilist,
+void PairLJCutCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                       int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype,itable;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double fraction,table;
-  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double grij,expm2,prefactor,t,erfc;
+  int i, j, ii, jj, jnum, itype, jtype, itable;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double fraction, table;
+  double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double grij, expm2, prefactor, t, erfc;
  int *jlist;
  double rsq;

@ -262,68 +239,71 @@ void PairLJCutCoulLongGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
          if (!ncoultablebits || rsq <= tabinnersq) {
            r = sqrt(rsq);
            grij = g_ewald * r;
-            expm2 = exp(-grij*grij);
-            t = 1.0 / (1.0 + EWALD_P*grij);
-            erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-            prefactor = qqrd2e * qtmp*q[j]/r;
-            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-            if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+            expm2 = exp(-grij * grij);
+            t = 1.0 / (1.0 + EWALD_P * grij);
+            erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+            prefactor = qqrd2e * qtmp * q[j] / r;
+            forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+            if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
          } else {
            union_int_float_t rsq_lookup;
            rsq_lookup.f = rsq;
            itable = rsq_lookup.i & ncoulmask;
            itable >>= ncoulshiftbits;
            fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
-            table = ftable[itable] + fraction*dftable[itable];
-            forcecoul = qtmp*q[j] * table;
+            table = ftable[itable] + fraction * dftable[itable];
+            forcecoul = qtmp * q[j] * table;
            if (factor_coul < 1.0) {
-              table = ctable[itable] + fraction*dctable[itable];
-              prefactor = qtmp*q[j] * table;
-              forcecoul -= (1.0-factor_coul)*prefactor;
+              table = ctable[itable] + fraction * dctable[itable];
+              prefactor = qtmp * q[j] * table;
+              forcecoul -= (1.0 - factor_coul) * prefactor;
            }
          }
-        } else forcecoul = 0.0;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        } else forcelj = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        } else
+          forcelj = 0.0;

-        fpair = (forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = (forcecoul + factor_lj * forcelj) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            if (!ncoultablebits || rsq <= tabinnersq)
-              ecoul = prefactor*erfc;
+              ecoul = prefactor * erfc;
            else {
-              table = etable[itable] + fraction*detable[itable];
-              ecoul = qtmp*q[j] * table;
+              table = etable[itable] + fraction * detable[itable];
+              ecoul = qtmp * q[j] * table;
            }
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-              offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_coul_msm_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_msm_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -35,36 +33,29 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **host_gcons, double **host_dgcons,
-                  double **offset, double *special_lj, const int inum,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  double **host_cut_ljsq, double host_cut_coulsq,
-                  double *host_special_coul, const int order,
-                  const double qqrd2e);
+int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **host_gcons, double **host_dgcons,
+                  double **offset, double *special_lj, const int inum, const int nall,
+                  const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
+                  FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
+                  double *host_special_coul, const int order, const double qqrd2e);
 void ljcm_gpu_clear();
-int ** ljcm_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success, double *host_q, double *boxlo,
-                          double *prd);
-void ljcm_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
-                      const double cpu_time, bool &success, double *host_q,
-                      const int nlocal, double *boxlo, double *prd);
+int **ljcm_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void ljcm_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
+                      double *boxlo, double *prd);
 double ljcm_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairLJCutCoulMSMGPU::PairLJCutCoulMSMGPU(LAMMPS *lmp) :
-  PairLJCutCoulMSM(lmp), gpu_mode(GPU_FORCE)
+PairLJCutCoulMSMGPU::PairLJCutCoulMSMGPU(LAMMPS *lmp) : PairLJCutCoulMSM(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -86,7 +77,7 @@ PairLJCutCoulMSMGPU::~PairLJCutCoulMSMGPU()

 void PairLJCutCoulMSMGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -94,7 +85,7 @@ void PairLJCutCoulMSMGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -103,30 +94,25 @@ void PairLJCutCoulMSMGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljcm_gpu_compute_n(neighbor->ago, inum, nall,
-                                    atom->x, atom->type, sublo,
-                                    subhi, atom->tag, atom->nspecial,
-                                    atom->special, eflag, vflag, eflag_atom,
-                                    vflag_atom, host_start,
-                                    &ilist, &numneigh, cpu_time, success,
-                                    atom->q, domain->boxlo, domain->prd);
+    firstneigh = ljcm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljcm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
+    ljcm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -141,12 +127,10 @@ void PairLJCutCoulMSMGPU::init_style()
 {
  cut_respa = nullptr;

-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/cut/coul/cut/gpu requires atom attribute q");
-
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/cut/gpu requires atom attribute q");

  if (force->kspace->scalar_pressure_flag)
-    error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' with GPU MSM Pair styles");
+    error->all(FLERR, "Must use 'kspace_modify pressure/scalar no' with GPU MSM Pair styles");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -154,10 +138,9 @@ void PairLJCutCoulMSMGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -169,27 +152,19 @@ void PairLJCutCoulMSMGPU::init_style()

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljcm_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                              force->kspace->get_gcons(),
-                              force->kspace->get_dgcons(),
-                              offset, force->special_lj,
-                              atom->nlocal, atom->nlocal+atom->nghost,
-                              mnf, maxspecial, cell_size, gpu_mode, screen,
-                              cut_ljsq, cut_coulsq, force->special_coul,
-                              force->kspace->order, force->qqrd2e);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      ljcm_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, force->kspace->get_gcons(),
+                    force->kspace->get_dgcons(), offset, force->special_lj, atom->nlocal,
+                    atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
+                    cut_ljsq, cut_coulsq, force->special_coul, force->kspace->order, force->qqrd2e);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -202,14 +177,14 @@ double PairLJCutCoulMSMGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag,
-                                      int /* vflag */, int *ilist,
-                                      int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype,itable;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double fraction,table;
-  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double egamma,fgamma,prefactor;
+void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                      int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype, itable;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double fraction, table;
+  double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double egamma, fgamma, prefactor;
  int *jlist;
  double rsq;

@ -242,66 +217,69 @@ void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
          if (!ncoultablebits || rsq <= tabinnersq) {
            r = sqrt(rsq);
-            prefactor = qqrd2e * qtmp*q[j]/r;
-            egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul);
-            fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul);
+            prefactor = qqrd2e * qtmp * q[j] / r;
+            egamma = 1.0 - (r / cut_coul) * force->kspace->gamma(r / cut_coul);
+            fgamma = 1.0 + (rsq / cut_coulsq) * force->kspace->dgamma(r / cut_coul);
            forcecoul = prefactor * fgamma;
-            if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+            if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
          } else {
            union_int_float_t rsq_lookup;
            rsq_lookup.f = rsq;
            itable = rsq_lookup.i & ncoulmask;
            itable >>= ncoulshiftbits;
            fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
-            table = ftable[itable] + fraction*dftable[itable];
-            forcecoul = qtmp*q[j] * table;
+            table = ftable[itable] + fraction * dftable[itable];
+            forcecoul = qtmp * q[j] * table;
            if (factor_coul < 1.0) {
-              table = ctable[itable] + fraction*dctable[itable];
-              prefactor = qtmp*q[j] * table;
-              forcecoul -= (1.0-factor_coul)*prefactor;
+              table = ctable[itable] + fraction * dctable[itable];
+              prefactor = qtmp * q[j] * table;
+              forcecoul -= (1.0 - factor_coul) * prefactor;
            }
          }
-        } else forcecoul = 0.0;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        } else forcelj = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        } else
+          forcelj = 0.0;

        fpair = (forcecoul + forcelj) * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            if (!ncoultablebits || rsq <= tabinnersq)
-              ecoul = prefactor*egamma;
+              ecoul = prefactor * egamma;
            else {
-              table = etable[itable] + fraction*detable[itable];
-              ecoul = qtmp*q[j] * table;
+              table = etable[itable] + fraction * detable[itable];
+              ecoul = qtmp * q[j] * table;
            }
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-              offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"
 #include "update.h"
@ -36,36 +34,29 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                 double **host_lj2, double **host_lj3, double **host_lj4,
-                 double **offset, double *special_lj, const int nlocal,
-                 const int nall, const int max_nbors, const int maxspecial,
-                 const double cell_size, int &gpu_mode, FILE *screen,
-                 double **host_cut_ljsq, double **host_cut_coulsq,
-                 double *host_special_coul, const double qqrd2e);
+int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                 double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                 const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                 const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                 double **host_cut_coulsq, double *host_special_coul, const double qqrd2e);
 void dpl_gpu_clear();
-int ** dpl_gpu_compute_n(const int ago, const int inum,
-                         const int nall, double **host_x, int *host_type,
-                         double *sublo, double *subhi, tagint *tag,
-                         int **nspecial, tagint **special, const bool eflag,
-                         const bool vflag, const bool eatom, const bool vatom,
-                         int &host_start, int **ilist, int **jnum,
-                         const double cpu_time, bool &success,
-                         double *host_q, double **host_mu,
-                         double *boxlo, double *prd);
-void dpl_gpu_compute(const int ago, const int inum,
-                     const int nall, double **host_x, int *host_type,
-                     int *ilist, int *numj, int **firstneigh,
-                     const bool eflag, const bool vflag, const bool eatom,
-                     const bool vatom, int &host_start, const double cpu_time,
-                     bool &success, double *host_q, double **host_mu,
-                     const int nlocal, double *boxlo, double *prd);
+int **dpl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double *host_q, double **host_mu,
+                        double *boxlo, double *prd);
+void dpl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success, double *host_q, double **host_mu, const int nlocal,
+                     double *boxlo, double *prd);
 double dpl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) : PairLJCutDipoleCut(lmp),
-  gpu_mode(GPU_FORCE)
+PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) :
+    PairLJCutDipoleCut(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -87,7 +78,7 @@ PairLJCutDipoleCutGPU::~PairLJCutDipoleCutGPU()

 void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -95,7 +86,7 @@ void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -104,30 +95,25 @@ void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->q, atom->mu, domain->boxlo,
-                                   domain->prd);
+    firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                   atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                   eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                   success, atom->q, atom->mu, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, atom->q,
+    dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                    atom->mu, atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -141,11 +127,10 @@ void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
 void PairLJCutDipoleCutGPU::init_style()
 {
  if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
-    error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque");
+    error->all(FLERR, "Pair dipole/cut/gpu requires atom attributes q, mu, torque");

-
-  if (strcmp(update->unit_style,"electron") == 0)
-    error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
+  if (strcmp(update->unit_style, "electron") == 0)
+    error->all(FLERR, "Cannot (yet) use 'electron' units with dipoles");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -153,10 +138,9 @@ void PairLJCutDipoleCutGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -164,22 +148,16 @@ void PairLJCutDipoleCutGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = dpl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                             force->special_coul, force->qqrd2e);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      dpl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                   atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                   screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -192,21 +170,20 @@ double PairLJCutDipoleCutGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
-                                   int *ilist, int *numneigh,
-                                   int **firstneigh)
+void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag, int *ilist,
+                                        int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
-  double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv;
-  double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
-  double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
-  double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
-  double forcelj,factor_coul,factor_lj;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fx, fy, fz;
+  double rsq, rinv, r2inv, r6inv, r3inv, r5inv, r7inv;
+  double forcecoulx, forcecouly, forcecoulz, crossx, crossy, crossz;
+  double tixcoul, tiycoul, tizcoul, tjxcoul, tjycoul, tjzcoul;
+  double fq, pdotp, pidotr, pjdotr, pre1, pre2, pre3, pre4;
+  double forcelj, factor_coul, factor_lj;
  int *jlist;

  evdwl = ecoul = 0.0;
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  double **x = atom->x;
  double **f = atom->f;
@ -218,7 +195,6 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
  double *special_lj = force->special_lj;
  double qqrd2e = force->qqrd2e;

-
  // loop over neighbors of my atoms

  for (ii = start; ii < inum; ii++) {
@ -240,11 +216,11 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        rinv = sqrt(r2inv);

        // atom can have both a charge and dipole
@ -257,119 +233,119 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
        if (rsq < cut_coulsq[itype][jtype]) {

          if (qtmp != 0.0 && q[j] != 0.0) {
-            r3inv = r2inv*rinv;
-            pre1 = qtmp*q[j]*r3inv;
+            r3inv = r2inv * rinv;
+            pre1 = qtmp * q[j] * r3inv;

-            forcecoulx += pre1*delx;
-            forcecouly += pre1*dely;
-            forcecoulz += pre1*delz;
+            forcecoulx += pre1 * delx;
+            forcecouly += pre1 * dely;
+            forcecoulz += pre1 * delz;
          }

          if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
-            r3inv = r2inv*rinv;
-            r5inv = r3inv*r2inv;
-            r7inv = r5inv*r2inv;
+            r3inv = r2inv * rinv;
+            r5inv = r3inv * r2inv;
+            r7inv = r5inv * r2inv;

-            pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
-            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
-            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
+            pdotp = mu[i][0] * mu[j][0] + mu[i][1] * mu[j][1] + mu[i][2] * mu[j][2];
+            pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
+            pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;

-            pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr;
-            pre2 = 3.0*r5inv*pjdotr;
-            pre3 = 3.0*r5inv*pidotr;
-            pre4 = -1.0*r3inv;
+            pre1 = 3.0 * r5inv * pdotp - 15.0 * r7inv * pidotr * pjdotr;
+            pre2 = 3.0 * r5inv * pjdotr;
+            pre3 = 3.0 * r5inv * pidotr;
+            pre4 = -1.0 * r3inv;

-            forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0];
-            forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1];
-            forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2];
+            forcecoulx += pre1 * delx + pre2 * mu[i][0] + pre3 * mu[j][0];
+            forcecouly += pre1 * dely + pre2 * mu[i][1] + pre3 * mu[j][1];
+            forcecoulz += pre1 * delz + pre2 * mu[i][2] + pre3 * mu[j][2];

-            crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
-            crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
-            crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
+            crossx = pre4 * (mu[i][1] * mu[j][2] - mu[i][2] * mu[j][1]);
+            crossy = pre4 * (mu[i][2] * mu[j][0] - mu[i][0] * mu[j][2]);
+            crossz = pre4 * (mu[i][0] * mu[j][1] - mu[i][1] * mu[j][0]);

-            tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
-            tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
-            tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
-            tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
-            tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
-            tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
+            tixcoul += crossx + pre2 * (mu[i][1] * delz - mu[i][2] * dely);
+            tiycoul += crossy + pre2 * (mu[i][2] * delx - mu[i][0] * delz);
+            tizcoul += crossz + pre2 * (mu[i][0] * dely - mu[i][1] * delx);
+            tjxcoul += -crossx + pre3 * (mu[j][1] * delz - mu[j][2] * dely);
+            tjycoul += -crossy + pre3 * (mu[j][2] * delx - mu[j][0] * delz);
+            tjzcoul += -crossz + pre3 * (mu[j][0] * dely - mu[j][1] * delx);
          }

          if (mu[i][3] > 0.0 && q[j] != 0.0) {
-            r3inv = r2inv*rinv;
-            r5inv = r3inv*r2inv;
-            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
-            pre1 = 3.0*q[j]*r5inv * pidotr;
-            pre2 = q[j]*r3inv;
+            r3inv = r2inv * rinv;
+            r5inv = r3inv * r2inv;
+            pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
+            pre1 = 3.0 * q[j] * r5inv * pidotr;
+            pre2 = q[j] * r3inv;

-            forcecoulx += pre2*mu[i][0] - pre1*delx;
-            forcecouly += pre2*mu[i][1] - pre1*dely;
-            forcecoulz += pre2*mu[i][2] - pre1*delz;
-            tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
-            tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
-            tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
+            forcecoulx += pre2 * mu[i][0] - pre1 * delx;
+            forcecouly += pre2 * mu[i][1] - pre1 * dely;
+            forcecoulz += pre2 * mu[i][2] - pre1 * delz;
+            tixcoul += pre2 * (mu[i][1] * delz - mu[i][2] * dely);
+            tiycoul += pre2 * (mu[i][2] * delx - mu[i][0] * delz);
+            tizcoul += pre2 * (mu[i][0] * dely - mu[i][1] * delx);
          }

          if (mu[j][3] > 0.0 && qtmp != 0.0) {
-            r3inv = r2inv*rinv;
-            r5inv = r3inv*r2inv;
-            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
-            pre1 = 3.0*qtmp*r5inv * pjdotr;
-            pre2 = qtmp*r3inv;
+            r3inv = r2inv * rinv;
+            r5inv = r3inv * r2inv;
+            pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;
+            pre1 = 3.0 * qtmp * r5inv * pjdotr;
+            pre2 = qtmp * r3inv;

-            forcecoulx += pre1*delx - pre2*mu[j][0];
-            forcecouly += pre1*dely - pre2*mu[j][1];
-            forcecoulz += pre1*delz - pre2*mu[j][2];
-            tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
-            tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
-            tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
+            forcecoulx += pre1 * delx - pre2 * mu[j][0];
+            forcecouly += pre1 * dely - pre2 * mu[j][1];
+            forcecoulz += pre1 * delz - pre2 * mu[j][2];
+            tjxcoul += -pre2 * (mu[j][1] * delz - mu[j][2] * dely);
+            tjycoul += -pre2 * (mu[j][2] * delx - mu[j][0] * delz);
+            tjzcoul += -pre2 * (mu[j][0] * dely - mu[j][1] * delx);
          }
        }

        // LJ interaction

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
          forcelj *= factor_lj * r2inv;
-        } else forcelj = 0.0;
+        } else
+          forcelj = 0.0;

        // total force

-        fq = factor_coul*qqrd2e;
-        fx = fq*forcecoulx + delx*forcelj;
-        fy = fq*forcecouly + dely*forcelj;
-        fz = fq*forcecoulz + delz*forcelj;
+        fq = factor_coul * qqrd2e;
+        fx = fq * forcecoulx + delx * forcelj;
+        fy = fq * forcecouly + dely * forcelj;
+        fz = fq * forcecoulz + delz * forcelj;

        // force & torque accumulation

        f[i][0] += fx;
        f[i][1] += fy;
        f[i][2] += fz;
-        torque[i][0] += fq*tixcoul;
-        torque[i][1] += fq*tiycoul;
-        torque[i][2] += fq*tizcoul;
+        torque[i][0] += fq * tixcoul;
+        torque[i][1] += fq * tiycoul;
+        torque[i][2] += fq * tizcoul;

        if (eflag) {
          if (rsq < cut_coulsq[itype][jtype]) {
-            ecoul = qtmp*q[j]*rinv;
+            ecoul = qtmp * q[j] * rinv;
            if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
-              ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr;
-            if (mu[i][3] > 0.0 && q[j] != 0.0)
-              ecoul += -q[j]*r3inv*pidotr;
-            if (mu[j][3] > 0.0 && qtmp != 0.0)
-              ecoul += qtmp*r3inv*pjdotr;
-            ecoul *= factor_coul*qqrd2e;
-          } else ecoul = 0.0;
+              ecoul += r3inv * pdotp - 3.0 * r5inv * pidotr * pjdotr;
+            if (mu[i][3] > 0.0 && q[j] != 0.0) ecoul += -q[j] * r3inv * pidotr;
+            if (mu[j][3] > 0.0 && qtmp != 0.0) ecoul += qtmp * r3inv * pjdotr;
+            ecoul *= factor_coul * qqrd2e;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-              offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz);
+        if (evflag) ev_tally_xyz_full(i, evdwl, ecoul, fx, fy, fz, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_dipole_long_gpu.cpp
+++ b/src/GPU/pair_lj_cut_dipole_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "kspace.h"
 #include "math_const.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"
 #include "update.h"
@ -34,50 +32,43 @@
 #include <cmath>
 #include <cstring>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;
 using namespace MathConst;

 // External functions from cuda library for atom decomposition

-int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                  double **host_lj2, double **host_lj3, double **host_lj4,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen,
-                  double **host_cut_ljsq, const double host_cut_coulsq,
-                  double *host_special_coul, const double qqrd2e,
+int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                  const double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
                  const double g_ewald);
 void dplj_gpu_clear();
-int ** dplj_gpu_compute_n(const int ago, const int inum,
-                          const int nall, double **host_x, int *host_type,
-                          double *sublo, double *subhi, tagint *tag,
-                          int **nspecial, tagint **special, const bool eflag,
-                          const bool vflag, const bool eatom, const bool vatom,
-                          int &host_start, int **ilist, int **jnum,
-                          const double cpu_time, bool &success,
-                          double *host_q, double **host_mu,
-                          double *boxlo, double *prd);
-void dplj_gpu_compute(const int ago, const int inum,
-                      const int nall, double **host_x, int *host_type,
-                      int *ilist, int *numj, int **firstneigh,
-                      const bool eflag, const bool vflag, const bool eatom,
-                      const bool vatom, int &host_start, const double cpu_time,
-                      bool &success, double *host_q, double **host_mu,
+int **dplj_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double **host_mu,
+                         double *boxlo, double *prd);
+void dplj_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, double **host_mu,
                      const int nlocal, double *boxlo, double *prd);
 double dplj_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairLJCutDipoleLongGPU::PairLJCutDipoleLongGPU(LAMMPS *lmp) : PairLJCutDipoleLong(lmp),
-  gpu_mode(GPU_FORCE)
+PairLJCutDipoleLongGPU::PairLJCutDipoleLongGPU(LAMMPS *lmp) :
+    PairLJCutDipoleLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -99,7 +90,7 @@ PairLJCutDipoleLongGPU::~PairLJCutDipoleLongGPU()

 void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -107,7 +98,7 @@ void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -116,30 +107,25 @@ void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = dplj_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success, atom->q, atom->mu, domain->boxlo,
-                                   domain->prd);
+    firstneigh = dplj_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, atom->mu, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    dplj_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success, atom->q,
-                    atom->mu, atom->nlocal, domain->boxlo, domain->prd);
+    dplj_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                     atom->mu, atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -153,11 +139,10 @@ void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
 void PairLJCutDipoleLongGPU::init_style()
 {
  if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
-    error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque");
+    error->all(FLERR, "Pair dipole/cut/gpu requires atom attributes q, mu, torque");

-
-  if (strcmp(update->unit_style,"electron") == 0)
-    error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
+  if (strcmp(update->unit_style, "electron") == 0)
+    error->all(FLERR, "Cannot (yet) use 'electron' units with dipoles");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -165,10 +150,9 @@ void PairLJCutDipoleLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -180,30 +164,23 @@ void PairLJCutDipoleLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,nullptr);
+  if (ncoultablebits) init_tables(cut_coul, nullptr);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = dplj_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                             force->special_coul, force->qqrd2e, g_ewald);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      dplj_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                    screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -216,27 +193,26 @@ double PairLJCutDipoleLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vflag,
-                                   int *ilist, int *numneigh,
-                                   int **firstneigh)
+void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vflag, int *ilist,
+                                         int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
-  double rsq,r,rinv,r2inv,r6inv;
-  double forcecoulx,forcecouly,forcecoulz,fforce;
-  double tixcoul,tiycoul,tizcoul;
-  double fx,fy,fz,fdx,fdy,fdz,fax,fay,faz;
-  double pdotp,pidotr,pjdotr,pre1,pre2,pre3;
-  double grij,expm2,t,erfc;
-  double g0,g1,g2,b0,b1,b2,b3,d0,d1,d2,d3;
-  double zdix,zdiy,zdiz,zdjx,zdjy,zdjz,zaix,zaiy,zaiz,zajx,zajy,zajz;
-  double g0b1_g1b2_g2b3,g0d1_g1d2_g2d3;
-  double forcelj,factor_coul,factor_lj,facm1;
-  double evdwl,ecoul;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz;
+  double rsq, r, rinv, r2inv, r6inv;
+  double forcecoulx, forcecouly, forcecoulz, fforce;
+  double tixcoul, tiycoul, tizcoul;
+  double fx, fy, fz, fdx, fdy, fdz, fax, fay, faz;
+  double pdotp, pidotr, pjdotr, pre1, pre2, pre3;
+  double grij, expm2, t, erfc;
+  double g0, g1, g2, b0, b1, b2, b3, d0, d1, d2, d3;
+  double zdix, zdiy, zdiz, zdjx, zdjy, zdjz, zaix, zaiy, zaiz, zajx, zajy, zajz;
+  double g0b1_g1b2_g2b3, g0d1_g1d2_g2d3;
+  double forcelj, factor_coul, factor_lj, facm1;
+  double evdwl, ecoul;
  int *jlist;

  evdwl = ecoul = 0.0;
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  double **x = atom->x;
  double **f = atom->f;
@ -249,8 +225,8 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
  double qqrd2e = force->qqrd2e;

  pre1 = 2.0 * g_ewald / MY_PIS;
-  pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS;
-  pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS;
+  pre2 = 4.0 * pow(g_ewald, 3.0) / MY_PIS;
+  pre3 = 8.0 * pow(g_ewald, 5.0) / MY_PIS;

  // loop over neighbors of my atoms

@ -273,51 +249,48 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        rinv = sqrt(r2inv);

        if (rsq < cut_coulsq) {
          r = sqrt(rsq);
          grij = g_ewald * r;
-          expm2 = exp(-grij*grij);
-          t = 1.0 / (1.0 + EWALD_P*grij);
-          erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+          expm2 = exp(-grij * grij);
+          t = 1.0 / (1.0 + EWALD_P * grij);
+          erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;

-          pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
-          pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
-          pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
+          pdotp = mu[i][0] * mu[j][0] + mu[i][1] * mu[j][1] + mu[i][2] * mu[j][2];
+          pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
+          pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;

-          g0 = qtmp*q[j];
-          g1 = qtmp*pjdotr - q[j]*pidotr + pdotp;
-          g2 = -pidotr*pjdotr;
+          g0 = qtmp * q[j];
+          g1 = qtmp * pjdotr - q[j] * pidotr + pdotp;
+          g2 = -pidotr * pjdotr;

          if (factor_coul > 0.0) {
            b0 = erfc * rinv;
-            b1 = (b0 + pre1*expm2) * r2inv;
-            b2 = (3.0*b1 + pre2*expm2) * r2inv;
-            b3 = (5.0*b2 + pre3*expm2) * r2inv;
+            b1 = (b0 + pre1 * expm2) * r2inv;
+            b2 = (3.0 * b1 + pre2 * expm2) * r2inv;
+            b3 = (5.0 * b2 + pre3 * expm2) * r2inv;

-            g0b1_g1b2_g2b3 = g0*b1 + g1*b2 + g2*b3;
-            fdx = delx * g0b1_g1b2_g2b3 -
-              b1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) +
-              b2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]);
-            fdy = dely * g0b1_g1b2_g2b3 -
-              b1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) +
-              b2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]);
-            fdz = delz * g0b1_g1b2_g2b3 -
-              b1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) +
-              b2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]);
+            g0b1_g1b2_g2b3 = g0 * b1 + g1 * b2 + g2 * b3;
+            fdx = delx * g0b1_g1b2_g2b3 - b1 * (qtmp * mu[j][0] - q[j] * mu[i][0]) +
+                b2 * (pjdotr * mu[i][0] + pidotr * mu[j][0]);
+            fdy = dely * g0b1_g1b2_g2b3 - b1 * (qtmp * mu[j][1] - q[j] * mu[i][1]) +
+                b2 * (pjdotr * mu[i][1] + pidotr * mu[j][1]);
+            fdz = delz * g0b1_g1b2_g2b3 - b1 * (qtmp * mu[j][2] - q[j] * mu[i][2]) +
+                b2 * (pjdotr * mu[i][2] + pidotr * mu[j][2]);

-            zdix = delx * (q[j]*b1 + b2*pjdotr) - b1*mu[j][0];
-            zdiy = dely * (q[j]*b1 + b2*pjdotr) - b1*mu[j][1];
-            zdiz = delz * (q[j]*b1 + b2*pjdotr) - b1*mu[j][2];
-            zdjx = delx * (-qtmp*b1 + b2*pidotr) - b1*mu[i][0];
-            zdjy = dely * (-qtmp*b1 + b2*pidotr) - b1*mu[i][1];
-            zdjz = delz * (-qtmp*b1 + b2*pidotr) - b1*mu[i][2];
+            zdix = delx * (q[j] * b1 + b2 * pjdotr) - b1 * mu[j][0];
+            zdiy = dely * (q[j] * b1 + b2 * pjdotr) - b1 * mu[j][1];
+            zdiz = delz * (q[j] * b1 + b2 * pjdotr) - b1 * mu[j][2];
+            zdjx = delx * (-qtmp * b1 + b2 * pidotr) - b1 * mu[i][0];
+            zdjy = dely * (-qtmp * b1 + b2 * pidotr) - b1 * mu[i][1];
+            zdjz = delz * (-qtmp * b1 + b2 * pidotr) - b1 * mu[i][2];

            if (factor_coul < 1.0) {
              fdx *= factor_coul;
@ -338,27 +311,24 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl

          if (factor_coul < 1.0) {
            d0 = (erfc - 1.0) * rinv;
-            d1 = (d0 + pre1*expm2) * r2inv;
-            d2 = (3.0*d1 + pre2*expm2) * r2inv;
-            d3 = (5.0*d2 + pre3*expm2) * r2inv;
+            d1 = (d0 + pre1 * expm2) * r2inv;
+            d2 = (3.0 * d1 + pre2 * expm2) * r2inv;
+            d3 = (5.0 * d2 + pre3 * expm2) * r2inv;

-            g0d1_g1d2_g2d3 = g0*d1 + g1*d2 + g2*d3;
-            fax = delx * g0d1_g1d2_g2d3 -
-              d1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) +
-              d2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]);
-            fay = dely * g0d1_g1d2_g2d3 -
-              d1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) +
-              d2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]);
-            faz = delz * g0d1_g1d2_g2d3 -
-              d1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) +
-              d2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]);
+            g0d1_g1d2_g2d3 = g0 * d1 + g1 * d2 + g2 * d3;
+            fax = delx * g0d1_g1d2_g2d3 - d1 * (qtmp * mu[j][0] - q[j] * mu[i][0]) +
+                d2 * (pjdotr * mu[i][0] + pidotr * mu[j][0]);
+            fay = dely * g0d1_g1d2_g2d3 - d1 * (qtmp * mu[j][1] - q[j] * mu[i][1]) +
+                d2 * (pjdotr * mu[i][1] + pidotr * mu[j][1]);
+            faz = delz * g0d1_g1d2_g2d3 - d1 * (qtmp * mu[j][2] - q[j] * mu[i][2]) +
+                d2 * (pjdotr * mu[i][2] + pidotr * mu[j][2]);

-            zaix = delx * (q[j]*d1 + d2*pjdotr) - d1*mu[j][0];
-            zaiy = dely * (q[j]*d1 + d2*pjdotr) - d1*mu[j][1];
-            zaiz = delz * (q[j]*d1 + d2*pjdotr) - d1*mu[j][2];
-            zajx = delx * (-qtmp*d1 + d2*pidotr) - d1*mu[i][0];
-            zajy = dely * (-qtmp*d1 + d2*pidotr) - d1*mu[i][1];
-            zajz = delz * (-qtmp*d1 + d2*pidotr) - d1*mu[i][2];
+            zaix = delx * (q[j] * d1 + d2 * pjdotr) - d1 * mu[j][0];
+            zaiy = dely * (q[j] * d1 + d2 * pjdotr) - d1 * mu[j][1];
+            zaiz = delz * (q[j] * d1 + d2 * pjdotr) - d1 * mu[j][2];
+            zajx = delx * (-qtmp * d1 + d2 * pidotr) - d1 * mu[i][0];
+            zajy = dely * (-qtmp * d1 + d2 * pidotr) - d1 * mu[i][1];
+            zajz = delz * (-qtmp * d1 + d2 * pidotr) - d1 * mu[i][2];

            if (factor_coul > 0.0) {
              facm1 = 1.0 - factor_coul;
@ -382,9 +352,9 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
          forcecouly = fdy + fay;
          forcecoulz = fdz + faz;

-          tixcoul = mu[i][1]*(zdiz + zaiz) - mu[i][2]*(zdiy + zaiy);
-          tiycoul = mu[i][2]*(zdix + zaix) - mu[i][0]*(zdiz + zaiz);
-          tizcoul = mu[i][0]*(zdiy + zaiy) - mu[i][1]*(zdix + zaix);
+          tixcoul = mu[i][1] * (zdiz + zaiz) - mu[i][2] * (zdiy + zaiy);
+          tiycoul = mu[i][2] * (zdix + zaix) - mu[i][0] * (zdiz + zaiz);
+          tizcoul = mu[i][0] * (zdiy + zaiy) - mu[i][1] * (zdix + zaix);
        } else {
          forcecoulx = forcecouly = forcecoulz = 0.0;
          tixcoul = tiycoul = tizcoul = 0.0;
@ -393,43 +363,45 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
        // LJ interaction

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-          fforce = factor_lj * forcelj*r2inv;
-        } else fforce = 0.0;
+          r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+          fforce = factor_lj * forcelj * r2inv;
+        } else
+          fforce = 0.0;

        // total force

-        fx = qqrd2e*forcecoulx + delx*fforce;
-        fy = qqrd2e*forcecouly + dely*fforce;
-        fz = qqrd2e*forcecoulz + delz*fforce;
+        fx = qqrd2e * forcecoulx + delx * fforce;
+        fy = qqrd2e * forcecouly + dely * fforce;
+        fz = qqrd2e * forcecoulz + delz * fforce;

        // force & torque accumulation

        f[i][0] += fx;
        f[i][1] += fy;
        f[i][2] += fz;
-        torque[i][0] += qqrd2e*tixcoul;
-        torque[i][1] += qqrd2e*tiycoul;
-        torque[i][2] += qqrd2e*tizcoul;
+        torque[i][0] += qqrd2e * tixcoul;
+        torque[i][1] += qqrd2e * tiycoul;
+        torque[i][2] += qqrd2e * tizcoul;

        if (eflag) {
          if (rsq < cut_coulsq && factor_coul > 0.0) {
-            ecoul = qqrd2e*(b0*g0 + b1*g1 + b2*g2);
+            ecoul = qqrd2e * (b0 * g0 + b1 * g1 + b2 * g2);
            if (factor_coul < 1.0) {
              ecoul *= factor_coul;
-              ecoul += (1-factor_coul) * qqrd2e * (d0*g0 + d1*g1 + d2*g2);
+              ecoul += (1 - factor_coul) * qqrd2e * (d0 * g0 + d1 * g1 + d2 * g2);
            }
-          } else ecoul = 0.0;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-              offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz);
+        if (evflag) ev_tally_xyz_full(i, evdwl, ecoul, fx, fy, fz, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,29 +32,24 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                 double **host_lj2, double **host_lj3, double **host_lj4,
-                 double **offset, double *special_lj, const int nlocal,
-                 const int nall, const int max_nbors, const int maxspecial,
+int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                 double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                 const int nlocal, const int nall, const int max_nbors, const int maxspecial,
                 const double cell_size, int &gpu_mode, FILE *screen);

-void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
-                    double **host_lj2, double **host_lj3, double **host_lj4,
-                    double **offset);
+void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                    double **host_lj3, double **host_lj4, double **offset);

 void ljl_gpu_clear();
-int ** ljl_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum,
-                         const double cpu_time, bool &success);
-void ljl_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success);
+int **ljl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success);
+void ljl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success);
 double ljl_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -82,7 +75,7 @@ PairLJCutGPU::~PairLJCutGPU()

 void PairLJCutGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -90,7 +83,7 @@ void PairLJCutGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -99,28 +92,24 @@ void PairLJCutGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall,
-                                   atom->x, atom->type, sublo,
-                                   subhi, atom->tag, atom->nspecial,
-                                   atom->special, eflag, vflag, eflag_atom,
-                                   vflag_atom, host_start,
-                                   &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        ljl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                          atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                          host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success);
+    ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -135,17 +124,15 @@ void PairLJCutGPU::init_style()
 {
  cut_respa = nullptr;

-
  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
  double cut;
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -153,21 +140,15 @@ void PairLJCutGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = ljl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                             atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
+                             gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -176,7 +157,7 @@ void PairLJCutGPU::reinit()
 {
  Pair::reinit();

-  ljl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset);
+  ljl_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset);
 }

 /* ---------------------------------------------------------------------- */
@ -189,11 +170,12 @@ double PairLJCutGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                               int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+void PairLJCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                               int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r6inv, forcelj, factor_lj;
  int *jlist;

  double **x = atom->x;
@ -220,26 +202,25 @@ void PairLJCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        r6inv = r2inv*r2inv*r2inv;
-        forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        fpair = factor_lj*forcelj*r2inv;
+        r2inv = 1.0 / rsq;
+        r6inv = r2inv * r2inv * r2inv;
+        forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        fpair = factor_lj * forcelj * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-            offset[itype][jtype];
+          evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_cut_tip4p_long_gpu.cpp
+++ b/src/GPU/pair_lj_cut_tip4p_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -34,55 +33,47 @@

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-    double **host_lj2, double **host_lj3, double **host_lj4,
-    double **offset, double *special_lj, const int nlocal,
-    const int tH, const int tO, const double alpha, const double qdist,
-    const int nall, const int max_nbors, const int maxspecial,
-    const double cell_size, int &gpu_mode, FILE *screen,
-    double **host_cut_ljsq, const double host_cut_coulsq,
-    const double host_cut_coulsqplus, double *host_special_coul,
-    const double qqrd2e, const double g_ewald,
-    int map_size, int max_same);
+int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                          double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                          const int nlocal, const int tH, const int tO, const double alpha,
+                          const double qdist, const int nall, const int max_nbors,
+                          const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                          double **host_cut_ljsq, const double host_cut_coulsq,
+                          const double host_cut_coulsqplus, double *host_special_coul,
+                          const double qqrd2e, const double g_ewald, int map_size, int max_same);
 void ljtip4p_long_gpu_clear();
-int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum,
-    const int nall, double **host_x, int *host_type,
-    double *sublo, double *subhi,
-    tagint *tag, int *map_array, int map_size,
-    int *sametag, int max_same,
-    int **nspecial,
-    tagint **special, const bool eflag, const bool vflag,
-    const bool eatom, const bool vatom, int &host_start,
-    int **ilist, int **jnum,
-    const double cpu_time, bool &success, double *host_q,
-    double *boxlo, double *prd);
-void ljtip4p_long_gpu_compute(const int ago, const int inum, const int nall,
-    double **host_x, int *host_type, int *ilist, int *numj,
-    int **firstneigh, const bool eflag, const bool vflag,
-    const bool eatom, const bool vatom, int &host_start,
-    const double cpu_time,
-    bool &success, double *host_q, const int nlocal,
-    double *boxlo, double *prd);
+int **ljtip4p_long_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                                 int *host_type, double *sublo, double *subhi, tagint *tag,
+                                 int *map_array, int map_size, int *sametag, int max_same,
+                                 int **nspecial, tagint **special, const bool eflag,
+                                 const bool vflag, const bool eatom, const bool vatom,
+                                 int &host_start, int **ilist, int **jnum, const double cpu_time,
+                                 bool &success, double *host_q, double *boxlo, double *prd);
+void ljtip4p_long_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                              int *host_type, int *ilist, int *numj, int **firstneigh,
+                              const bool eflag, const bool vflag, const bool eatom,
+                              const bool vatom, int &host_start, const double cpu_time,
+                              bool &success, double *host_q, const int nlocal, double *boxlo,
+                              double *prd);
 double ljtip4p_long_gpu_bytes();
-void ljtip4p_long_copy_molecule_data(int, tagint *, int *,
-                                     int, int *, int, int);
+void ljtip4p_long_copy_molecule_data(int, tagint *, int *, int, int *, int, int);

 /* ---------------------------------------------------------------------- */

-PairLJCutTIP4PLongGPU::PairLJCutTIP4PLongGPU(LAMMPS *lmp)
-: PairLJCutTIP4PLong(lmp), gpu_mode(GPU_FORCE)
+PairLJCutTIP4PLongGPU::PairLJCutTIP4PLongGPU(LAMMPS *lmp) :
+    PairLJCutTIP4PLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -104,14 +95,14 @@ PairLJCutTIP4PLongGPU::~PairLJCutTIP4PLongGPU()

 void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);
  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;

  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -120,40 +111,26 @@ void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljtip4p_long_gpu_compute_n(neighbor->ago, inum, nall,
-        atom->x, atom->type, sublo,
-        subhi,
-        atom->tag, atom->get_map_array(), atom->get_map_size(),
-        atom->sametag, atom->get_max_same(),
-        atom->nspecial,
-        atom->special, eflag, vflag, eflag_atom,
-        vflag_atom, host_start, &ilist, &numneigh,
-        cpu_time, success, atom->q, domain->boxlo,
-        domain->prd);
+    firstneigh = ljtip4p_long_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+        atom->get_map_array(), atom->get_map_size(), atom->sametag, atom->get_max_same(),
+        atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist,
+        &numneigh, cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljtip4p_long_copy_molecule_data(nall, atom->tag,
-        atom->get_map_array(), atom->get_map_size(),
-        atom->sametag, atom->get_max_same(), neighbor->ago);
-    ljtip4p_long_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-        vflag_atom, host_start, cpu_time, success, atom->q,
-        atom->nlocal, domain->boxlo, domain->prd);
+    ljtip4p_long_copy_molecule_data(nall, atom->tag, atom->get_map_array(), atom->get_map_size(),
+                                    atom->sametag, atom->get_max_same(), neighbor->ago);
+    ljtip4p_long_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
+                             firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                             success, atom->q, atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
-
-//  if (host_start<inum) {
-//    cpu_time = platform::walltime();
-//    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
-//    cpu_time = platform::walltime() - cpu_time;
-//  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
 }

 /* ----------------------------------------------------------------------
@ -165,17 +142,16 @@ void PairLJCutTIP4PLongGPU::init_style()

  cut_respa = nullptr;
  if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style lj/cut/tip4p/long/gpu requires atom IDs");
+    error->all(FLERR, "Pair style lj/cut/tip4p/long/gpu requires atom IDs");
  if (!atom->q_flag)
    error->all(FLERR, "Pair style lj/cut/tip4p/long/gpu requires atom attribute q");
-  if (force->bond == nullptr)
-    error->all(FLERR,"Must use a bond style with TIP4P potential");
-  if (force->angle == nullptr)
-    error->all(FLERR,"Must use an angle style with TIP4P potential");
+  if (force->bond == nullptr) error->all(FLERR, "Must use a bond style with TIP4P potential");
+  if (force->angle == nullptr) error->all(FLERR, "Must use an angle style with TIP4P potential");

  if (atom->map_style == Atom::MAP_HASH)
-    error->all(FLERR,"GPU-accelerated lj/cut/tip4p/long currently"
-        " requires 'array' style atom map (atom_modify map array)");
+    error->all(FLERR,
+               "GPU-accelerated lj/cut/tip4p/long currently"
+               " requires 'array' style atom map (atom_modify map array)");

  //PairLJCutCoulLong::init_style();
  // Repeat cutsq calculation because done after call to init_style
@ -184,10 +160,9 @@ void PairLJCutTIP4PLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -196,51 +171,41 @@ void PairLJCutTIP4PLongGPU::init_style()
  double cell_size = sqrt(maxcut) + neighbor->skin;

  // insure use of KSpace long-range solver, set g_ewald
-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables
-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;

  // set alpha parameter
  double theta = force->angle->equilibrium_angle(typeA);
  double blen = force->bond->equilibrium_distance(typeB);
-  alpha = qdist / (cos(0.5*theta) * blen);
+  alpha = qdist / (cos(0.5 * theta) * blen);

  cut_coulsq = cut_coul * cut_coul;
-  double cut_coulsqplus = (cut_coul+qdist+blen) * (cut_coul+qdist+blen);
-  if (maxcut < cut_coulsqplus) {
-    cell_size = (cut_coul+qdist+blen) + neighbor->skin;
-  }
+  double cut_coulsqplus = (cut_coul + qdist + blen) * (cut_coul + qdist + blen);
+  if (maxcut < cut_coulsqplus) { cell_size = (cut_coul + qdist + blen) + neighbor->skin; }
  if (comm->cutghostuser < cell_size) {
    if (comm->me == 0)
-      error->warning(FLERR,"Increasing communication cutoff from {:.8} "
-                      "to {:.8} for TIP4P GPU style",comm->cutghostuser,cell_size);
+      error->warning(FLERR,
+                     "Increasing communication cutoff from {:.8} to {:.8} for TIP4P GPU style",
+                     comm->cutghostuser, cell_size);
    comm->cutghostuser = cell_size;
  }

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljtip4p_long_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                             offset, force->special_lj, atom->nlocal,
-                             typeH, typeO, alpha, qdist,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen, cut_ljsq,
-                             cut_coulsq, cut_coulsqplus,
-                             force->special_coul, force->qqrd2e,
-                             g_ewald, atom->get_map_size(),
-                             atom->get_max_same());
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = ljtip4p_long_gpu_init(
+      atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, typeH,
+      typeO, alpha, qdist, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+      screen, cut_ljsq, cut_coulsq, cut_coulsqplus, force->special_coul, force->qqrd2e, g_ewald,
+      atom->get_map_size(), atom->get_max_same());
+  GPU_EXTRA::check_flag(success, error, world);
  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->cut = 1;
-    neighbor->requests[irequest]->cutoff = cut_coul+qdist+blen + neighbor->skin;
+    auto req = neighbor->add_request(this, NeighConst::REQ_FULL);
+    req->set_cutoff(cut_coul + qdist + blen + neighbor->skin);
  }
 }

@ -253,4 +218,3 @@ double PairLJCutTIP4PLongGPU::memory_usage()
 }

 /* ---------------------------------------------------------------------- */
-
--- a/src/GPU/pair_lj_expand_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_expand_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,56 +24,50 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                   double **host_lj2, double **host_lj3, double **host_lj4,
-                   double **offset, double **shift, double *special_lj,
-                   const int nlocal, const int nall, const int max_nbors,
-                   const int maxspecial, const double cell_size,
-                   int &gpu_mode, FILE *screen, double **host_cut_ljsq,
-                   double host_cut_coulsq, double *host_special_coul,
+int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                   double **host_lj3, double **host_lj4, double **offset, double **shift,
+                   double *special_lj, const int nlocal, const int nall, const int max_nbors,
+                   const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                   double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul,
                   const double qqrd2e, const double g_ewald);
-void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
-                      double **host_lj2, double **host_lj3, double **host_lj4,
-                      double **offset, double **shift, double **host_lj_cutsq);
+void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                      double **host_lj3, double **host_lj4, double **offset, double **shift,
+                      double **host_lj_cutsq);
 void ljecl_gpu_clear();
-int ** ljecl_gpu_compute_n(const int ago, const int inum,
-                           const int nall, double **host_x, int *host_type,
-                           double *sublo, double *subhi, tagint *tag,
-                           int **nspecial, tagint **special, const bool eflag,
-                           const bool vflag, const bool eatom, const bool vatom,
-                           int &host_start, int **ilist, int **jnum,
-                           const double cpu_time, bool &success, double *host_q,
-                           double *boxlo, double *prd);
-void ljecl_gpu_compute(const int ago, const int inum, const int nall,
-                       double **host_x, int *host_type, int *ilist, int *numj,
-                       int **firstneigh, const bool eflag, const bool vflag,
-                       const bool eatom, const bool vatom, int &host_start,
-                       const double cpu_time, bool &success, double *host_q,
-                       const int nlocal, double *boxlo, double *prd);
+int **ljecl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                          int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                          tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                          const bool vatom, int &host_start, int **ilist, int **jnum,
+                          const double cpu_time, bool &success, double *host_q, double *boxlo,
+                          double *prd);
+void ljecl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                       const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                       const double cpu_time, bool &success, double *host_q, const int nlocal,
+                       double *boxlo, double *prd);
 double ljecl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

 PairLJExpandCoulLongGPU::PairLJExpandCoulLongGPU(LAMMPS *lmp) :
-  PairLJExpandCoulLong(lmp), gpu_mode(GPU_FORCE)
+    PairLJExpandCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  cpu_time = 0.0;
@ -95,7 +88,7 @@ PairLJExpandCoulLongGPU::~PairLJExpandCoulLongGPU()

 void PairLJExpandCoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -103,7 +96,7 @@ void PairLJExpandCoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -112,30 +105,25 @@ void PairLJExpandCoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljecl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = ljecl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                     atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                     eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+                                     cpu_time, success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljecl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
-                     atom->nlocal, domain->boxlo, domain->prd);
+    ljecl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                      eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                      atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -150,8 +138,7 @@ void PairLJExpandCoulLongGPU::init_style()
 {
  cut_respa = nullptr;

-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/cut/coul/long/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/long/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -159,10 +146,9 @@ void PairLJExpandCoulLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -174,30 +160,23 @@ void PairLJExpandCoulLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljecl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                              offset, shift, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                              force->special_coul, force->qqrd2e, g_ewald);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = ljecl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift,
+                               force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                               maxspecial, cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
+                               force->special_coul, force->qqrd2e, g_ewald);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -206,7 +185,7 @@ void PairLJExpandCoulLongGPU::reinit()
 {
  Pair::reinit();

-  ljecl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, shift, cut_ljsq);
+  ljecl_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift, cut_ljsq);
 }

 /* ---------------------------------------------------------------------- */
@ -219,16 +198,15 @@ double PairLJExpandCoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
-                                       int /* vflag */, int *ilist,
-                                       int *numneigh, int **firstneigh)
+void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
+                                          int *ilist, int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype,itable;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
-  double fraction,table;
-  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double grij,expm2,prefactor,t,erfc;
-  double rsq,rshift,rshiftsq,rshift2inv;
+  int i, j, ii, jj, jnum, itype, jtype, itable;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double fraction, table;
+  double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double grij, expm2, prefactor, t, erfc;
+  double rsq, rshift, rshiftsq, rshift2inv;

  int *jlist;

@ -263,73 +241,76 @@ void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;

        if (rsq < cut_coulsq) {
          if (!ncoultablebits || rsq <= tabinnersq) {
            r = sqrt(rsq);
            grij = g_ewald * r;
-            expm2 = exp(-grij*grij);
-            t = 1.0 / (1.0 + EWALD_P*grij);
-            erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-            prefactor = qqrd2e * qtmp*q[j]/r;
-            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-            if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+            expm2 = exp(-grij * grij);
+            t = 1.0 / (1.0 + EWALD_P * grij);
+            erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+            prefactor = qqrd2e * qtmp * q[j] / r;
+            forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+            if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
          } else {
            union_int_float_t rsq_lookup;
            rsq_lookup.f = rsq;
            itable = rsq_lookup.i & ncoulmask;
            itable >>= ncoulshiftbits;
            fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
-            table = ftable[itable] + fraction*dftable[itable];
-            forcecoul = qtmp*q[j] * table;
+            table = ftable[itable] + fraction * dftable[itable];
+            forcecoul = qtmp * q[j] * table;
            if (factor_coul < 1.0) {
-              table = ctable[itable] + fraction*dctable[itable];
-              prefactor = qtmp*q[j] * table;
-              forcecoul -= (1.0-factor_coul)*prefactor;
+              table = ctable[itable] + fraction * dctable[itable];
+              prefactor = qtmp * q[j] * table;
+              forcecoul -= (1.0 - factor_coul) * prefactor;
            }
          }
-        } else forcecoul = 0.0;
+        } else
+          forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
          r = sqrt(rsq);
          rshift = r - shift[itype][jtype];
-          rshiftsq = rshift*rshift;
-          rshift2inv = 1.0/rshiftsq;
-          r6inv = rshift2inv*rshift2inv*rshift2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-          forcelj = factor_lj*forcelj/rshift/r;
-        } else forcelj = 0.0;
+          rshiftsq = rshift * rshift;
+          rshift2inv = 1.0 / rshiftsq;
+          r6inv = rshift2inv * rshift2inv * rshift2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+          forcelj = factor_lj * forcelj / rshift / r;
+        } else
+          forcelj = 0.0;

-        fpair = forcecoul*r2inv + forcelj;
+        fpair = forcecoul * r2inv + forcelj;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (rsq < cut_coulsq) {
            if (!ncoultablebits || rsq <= tabinnersq)
-              ecoul = prefactor*erfc;
+              ecoul = prefactor * erfc;
            else {
-              table = etable[itable] + fraction*detable[itable];
-              ecoul = qtmp*q[j] * table;
+              table = etable[itable] + fraction * detable[itable];
+              ecoul = qtmp * q[j] * table;
            }
-            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
-          } else ecoul = 0.0;
+            if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-              offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_expand_gpu.cpp
+++ b/src/GPU/pair_lj_expand_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,28 +32,22 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                 double **host_lj2, double **host_lj3, double **host_lj4,
-                 double **offset, double **shift, double *special_lj,
-                 const int nlocal, const int nall, const int max_nbors,
-                 const int maxspecial, const double cell_size, int &gpu_mode,
-                 FILE *screen);
-void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
-                    double **host_lj2, double **host_lj3, double **host_lj4,
-                    double **offset, double **shift);
+int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                 double **host_lj3, double **host_lj4, double **offset, double **shift,
+                 double *special_lj, const int nlocal, const int nall, const int max_nbors,
+                 const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
+void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                    double **host_lj3, double **host_lj4, double **offset, double **shift);
 void lje_gpu_clear();
-int ** lje_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum,
-                         const double cpu_time, bool &success);
-void lje_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success);
+int **lje_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success);
+void lje_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success);
 double lje_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -81,7 +73,7 @@ PairLJExpandGPU::~PairLJExpandGPU()

 void PairLJExpandGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -89,7 +81,7 @@ void PairLJExpandGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -98,28 +90,24 @@ void PairLJExpandGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success);
+    firstneigh =
+        lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                          atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                          host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    lje_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success);
+    lje_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -139,10 +127,9 @@ void PairLJExpandGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +137,15 @@ void PairLJExpandGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = lje_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                             offset, shift, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = lje_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift,
+                             force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                             maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -173,7 +154,7 @@ void PairLJExpandGPU::reinit()
 {
  Pair::reinit();

-  lje_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, shift);
+  lje_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift);
 }

 /* ---------------------------------------------------------------------- */
@ -186,14 +167,13 @@ double PairLJExpandGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag,
-                                  int /* vflag */, int *ilist,
+void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                  int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
-  double r,rshift,rshiftsq;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r6inv, forcelj, factor_lj;
+  double r, rshift, rshiftsq;
  int *jlist;

  double **x = atom->x;
@ -220,29 +200,28 @@ void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
        r = sqrt(rsq);
        rshift = r - shift[itype][jtype];
-        rshiftsq = rshift*rshift;
-        r2inv = 1.0/rshiftsq;
-        r6inv = r2inv*r2inv*r2inv;
-        forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-        fpair = factor_lj*forcelj/rshift/r;
+        rshiftsq = rshift * rshift;
+        r2inv = 1.0 / rshiftsq;
+        r6inv = r2inv * r2inv * r2inv;
+        forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        fpair = factor_lj * forcelj / rshift / r;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
-            offset[itype][jtype];
+          evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_gromacs_gpu.cpp
+++ b/src/GPU/pair_lj_gromacs_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,34 +32,27 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                   double **host_lj2, double **host_lj3, double **host_lj4,
-                   double *special_lj, const int inum, const int nall,
-                   const int max_nbors, const int maxspecial,
-                   const double cell_size, int &gpu_mode, FILE *screen,
-                   double **host_ljsw1, double **host_ljsw2,
-                   double **host_ljsw3, double **host_ljsw4,
-                   double **host_ljsw5, double **cut_inner,
-                   double **cut_innersq);
+int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                   double **host_lj3, double **host_lj4, double *special_lj, const int inum,
+                   const int nall, const int max_nbors, const int maxspecial,
+                   const double cell_size, int &gpu_mode, FILE *screen, double **host_ljsw1,
+                   double **host_ljsw2, double **host_ljsw3, double **host_ljsw4,
+                   double **host_ljsw5, double **cut_inner, double **cut_innersq);
 void ljgrm_gpu_clear();
-int ** ljgrm_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum, const double cpu_time,
-                           bool &success);
-void ljgrm_gpu_compute(const int ago, const int inum_full, const int nall,
-                       double **host_x, int *host_type, int *ilist, int *numj,
-                       int **firstneigh, const bool eflag, const bool vflag,
-                       const bool eatom, const bool vatom, int &host_start,
+int **ljgrm_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                          int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                          tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                          const bool vatom, int &host_start, int **ilist, int **jnum,
+                          const double cpu_time, bool &success);
+void ljgrm_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                       int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                       const bool vflag, const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double ljgrm_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairLJGromacsGPU::PairLJGromacsGPU(LAMMPS *lmp) :
-  PairLJGromacs(lmp), gpu_mode(GPU_FORCE)
+PairLJGromacsGPU::PairLJGromacsGPU(LAMMPS *lmp) : PairLJGromacs(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -83,7 +74,7 @@ PairLJGromacsGPU::~PairLJGromacsGPU()

 void PairLJGromacsGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -91,7 +82,7 @@ void PairLJGromacsGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -100,28 +91,24 @@ void PairLJGromacsGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ljgrm_gpu_compute_n(neighbor->ago, inum, nall,
-                                     atom->x, atom->type, sublo,
-                                     subhi, atom->tag, atom->nspecial,
-                                     atom->special, eflag, vflag, eflag_atom,
-                                     vflag_atom, host_start, &ilist,
-                                     &numneigh, cpu_time, success);
+    firstneigh =
+        ljgrm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                            atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                            host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ljgrm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success);
+    ljgrm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                      eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -141,10 +128,9 @@ void PairLJGromacsGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        mcut = init_one(i,j);
+        mcut = init_one(i, j);
        mcut *= mcut;
-        if (mcut > maxcut)
-          maxcut = mcut;
+        if (mcut > maxcut) maxcut = mcut;
        cutsq[i][j] = cutsq[j][i] = mcut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -152,23 +138,17 @@ void PairLJGromacsGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ljgrm_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                                                 force->special_lj, atom->nlocal,
-                               atom->nlocal+atom->nghost, mnf, maxspecial,
-                               cell_size, gpu_mode, screen, ljsw1, ljsw2,
-                               ljsw3, ljsw4, ljsw5, cut_inner, cut_inner_sq);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      ljgrm_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, force->special_lj, atom->nlocal,
+                     atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
+                     ljsw1, ljsw2, ljsw3, ljsw4, ljsw5, cut_inner, cut_inner_sq);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -181,14 +161,13 @@ double PairLJGromacsGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJGromacsGPU::cpu_compute(int start, int inum, int eflag,
-                                   int /* vflag */, int *ilist,
+void PairLJGromacsGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                   int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
-  double r,t,fswitch,eswitch;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r6inv, forcelj, factor_lj;
+  double r, t, fswitch, eswitch;
  int *jlist;

  double **x = atom->x;
@ -215,36 +194,36 @@ void PairLJGromacsGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-              r6inv = r2inv*r2inv*r2inv;
-        forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-              if (rsq > cut_inner_sq[itype][jtype]) {
+        r2inv = 1.0 / rsq;
+        r6inv = r2inv * r2inv * r2inv;
+        forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+        if (rsq > cut_inner_sq[itype][jtype]) {
          r = sqrt(rsq);
          t = r - cut_inner[itype][jtype];
-                 fswitch = r*t*t*(ljsw1[itype][jtype] + ljsw2[itype][jtype]*t);
-                forcelj += fswitch;
+          fswitch = r * t * t * (ljsw1[itype][jtype] + ljsw2[itype][jtype] * t);
+          forcelj += fswitch;
        }
-              fpair = factor_lj*forcelj * r2inv;
+        fpair = factor_lj * forcelj * r2inv;

-              f[i][0] += delx*fpair;
-              f[i][1] += dely*fpair;
-              f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

-               if (eflag) {
-                evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
-                evdwl += ljsw5[itype][jtype];
+        if (eflag) {
+          evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
+          evdwl += ljsw5[itype][jtype];
          if (rsq > cut_inner_sq[itype][jtype]) {
-            eswitch = t*t*t*(ljsw3[itype][jtype] + ljsw4[itype][jtype]*t);
+            eswitch = t * t * t * (ljsw3[itype][jtype] + ljsw4[itype][jtype] * t);
            evdwl += eswitch;
          }
-                evdwl *= factor_lj;
-              }
+          evdwl *= factor_lj;
+        }

-              if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_sdk_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_sdk_coul_long_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,58 +24,51 @@
 #include "gpu_extra.h"
 #include "kspace.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

 #include <cmath>

-#define EWALD_F   1.12837917
-#define EWALD_P   0.3275911
-#define A1        0.254829592
-#define A2       -0.284496736
-#define A3        1.421413741
-#define A4       -1.453152027
-#define A5        1.061405429
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int sdkl_gpu_init(const int ntypes, double **cutsq, int **lj_type,
-                  double **host_lj1, double **host_lj2, double **host_lj3,
-                  double **host_lj4, double **offset, double *special_lj,
-                  const int nlocal, const int nall, const int max_nbors,
-                  const int maxspecial, const double cell_size, int &gpu_mode,
-                  FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
-                  double *host_special_coul, const double qqrd2e,
-                  const double g_ewald);
+int sdkl_gpu_init(const int ntypes, double **cutsq, int **lj_type, double **host_lj1,
+                  double **host_lj2, double **host_lj3, double **host_lj4, double **offset,
+                  double *special_lj, const int nlocal, const int nall, const int max_nbors,
+                  const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                  double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul,
+                  const double qqrd2e, const double g_ewald);
 void sdkl_gpu_clear();
-int ** sdkl_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum, const double cpu_time,
-                          bool &success, double *host_q, double *boxlo,
-                          double *prd);
-void sdkl_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
-                      const double cpu_time, bool &success, double *host_q,
-                      const int nlocal, double *boxlo, double *prd);
+int **sdkl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void sdkl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
+                      double *boxlo, double *prd);
 double sdkl_gpu_bytes();

 #include "lj_sdk_common.h"

-
 using namespace LJSDKParms;

 /* ---------------------------------------------------------------------- */

 PairLJSDKCoulLongGPU::PairLJSDKCoulLongGPU(LAMMPS *lmp) :
-  PairLJSDKCoulLong(lmp), gpu_mode(GPU_FORCE)
+    PairLJSDKCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -98,7 +90,7 @@ PairLJSDKCoulLongGPU::~PairLJSDKCoulLongGPU()

 void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -106,7 +98,7 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -115,35 +107,33 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = sdkl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                    atom->type, sublo, subhi,
-                                    atom->tag, atom->nspecial, atom->special,
-                                    eflag, vflag, eflag_atom, vflag_atom,
-                                    host_start, &ilist, &numneigh, cpu_time,
-                                    success, atom->q, domain->boxlo,
-                                    domain->prd);
+    firstneigh = sdkl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    sdkl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                     vflag_atom, host_start, cpu_time, success, atom->q,
+    sdkl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    if (evflag) {
-      if (eflag) cpu_compute<1,1>(host_start, inum, ilist, numneigh, firstneigh);
-      else cpu_compute<1,0>(host_start, inum, ilist, numneigh, firstneigh);
-    } else cpu_compute<0,0>(host_start, inum, ilist, numneigh, firstneigh);
+      if (eflag)
+        cpu_compute<1, 1>(host_start, inum, ilist, numneigh, firstneigh);
+      else
+        cpu_compute<1, 0>(host_start, inum, ilist, numneigh, firstneigh);
+    } else
+      cpu_compute<0, 0>(host_start, inum, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
  }
 }
@ -154,8 +144,7 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)

 void PairLJSDKCoulLongGPU::init_style()
 {
-  if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/sdk/coul/long/gpu requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/sdk/coul/long/gpu requires atom attribute q");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -163,10 +152,9 @@ void PairLJSDKCoulLongGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -178,31 +166,23 @@ void PairLJSDKCoulLongGPU::init_style()

  // insure use of KSpace long-range solver, set g_ewald

-  if (force->kspace == nullptr)
-    error->all(FLERR,"Pair style is incompatible with KSpace style");
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style is incompatible with KSpace style");
  g_ewald = force->kspace->g_ewald;

  // setup force tables

-  if (ncoultablebits) init_tables(cut_coul,nullptr);
+  if (ncoultablebits) init_tables(cut_coul, nullptr);

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = sdkl_gpu_init(atom->ntypes+1, cutsq, lj_type, lj1, lj2, lj3,
-                              lj4, offset, force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen, cut_ljsq,
-                              cut_coulsq, force->special_coul,
-                              force->qqrd2e, g_ewald);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      sdkl_gpu_init(atom->ntypes + 1, cutsq, lj_type, lj1, lj2, lj3, lj4, offset, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                    screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -215,21 +195,21 @@ double PairLJSDKCoulLongGPU::memory_usage()

 /* ---------------------------------------------------------------------- */
 template <int EVFLAG, int EFLAG>
-void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
-                                       int *numneigh, int **firstneigh)
+void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist, int *numneigh,
+                                       int **firstneigh)
 {
-  int i,j,ii,jj;
-  double qtmp,xtmp,ytmp,ztmp;
-  double r2inv,forcecoul,forcelj,factor_coul,factor_lj;
+  int i, j, ii, jj;
+  double qtmp, xtmp, ytmp, ztmp;
+  double r2inv, forcecoul, forcelj, factor_coul, factor_lj;

-  const double * const * const x = atom->x;
-  double * const * const f = atom->f;
-  const double * const q = atom->q;
-  const int * const type = atom->type;
-  const double * const special_coul = force->special_coul;
-  const double * const special_lj = force->special_lj;
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const double *const q = atom->q;
+  const int *const type = atom->type;
+  const double *const special_coul = force->special_coul;
+  const double *const special_lj = force->special_lj;
  const double qqrd2e = force->qqrd2e;
-  double fxtmp,fytmp,fztmp;
+  double fxtmp, fytmp, fztmp;

  // loop over neighbors of my atoms

@ -239,10 +219,10 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
    xtmp = x[i][0];
    ytmp = x[i][1];
    ztmp = x[i][2];
-    fxtmp=fytmp=fztmp=0.0;
+    fxtmp = fytmp = fztmp = 0.0;

    const int itype = type[i];
-    const int * const jlist = firstneigh[i];
+    const int *const jlist = firstneigh[i];
    const int jnum = numneigh[i];

    for (jj = 0; jj < jnum; jj++) {
@ -254,7 +234,7 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
      const double delx = xtmp - x[j][0];
      const double dely = ytmp - x[j][1];
      const double delz = ztmp - x[j][2];
-      const double rsq = delx*delx + dely*dely + delz*delz;
+      const double rsq = delx * delx + dely * dely + delz * delz;
      const int jtype = type[j];

      double evdwl = 0.0;
@ -262,41 +242,40 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
      double fpair = 0.0;

      if (rsq < cutsq[itype][jtype]) {
-          r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        const int ljt = lj_type[itype][jtype];

        if (rsq < cut_coulsq) {
          if (!ncoultablebits || rsq <= tabinnersq) {
            const double r = sqrt(rsq);
            const double grij = g_ewald * r;
-            const double expm2 = exp(-grij*grij);
-            const double t = 1.0 / (1.0 + EWALD_P*grij);
-            const double erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-            const double prefactor = qqrd2e * qtmp*q[j]/r;
-            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
-            if (EFLAG) ecoul = prefactor*erfc;
+            const double expm2 = exp(-grij * grij);
+            const double t = 1.0 / (1.0 + EWALD_P * grij);
+            const double erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+            const double prefactor = qqrd2e * qtmp * q[j] / r;
+            forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+            if (EFLAG) ecoul = prefactor * erfc;
            if (factor_coul < 1.0) {
-              forcecoul -= (1.0-factor_coul)*prefactor;
-              if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
+              forcecoul -= (1.0 - factor_coul) * prefactor;
+              if (EFLAG) ecoul -= (1.0 - factor_coul) * prefactor;
            }
          } else {
            union_int_float_t rsq_lookup;
            rsq_lookup.f = rsq;
            int itable = rsq_lookup.i & ncoulmask;
            itable >>= ncoulshiftbits;
-            const double fraction = (rsq_lookup.f - rtable[itable]) *
-                                     drtable[itable];
-            const double table = ftable[itable] + fraction*dftable[itable];
-            forcecoul = qtmp*q[j] * table;
+            const double fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+            const double table = ftable[itable] + fraction * dftable[itable];
+            forcecoul = qtmp * q[j] * table;
            if (EFLAG) {
-              const double table2 = etable[itable] + fraction*detable[itable];
-              ecoul = qtmp*q[j] * table2;
+              const double table2 = etable[itable] + fraction * detable[itable];
+              ecoul = qtmp * q[j] * table2;
            }
            if (factor_coul < 1.0) {
-              const double table2 = ctable[itable] + fraction*dctable[itable];
-              const double prefactor = qtmp*q[j] * table2;
-              forcecoul -= (1.0-factor_coul)*prefactor;
-              if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
+              const double table2 = ctable[itable] + fraction * dctable[itable];
+              const double prefactor = qtmp * q[j] * table2;
+              forcecoul -= (1.0 - factor_coul) * prefactor;
+              if (EFLAG) ecoul -= (1.0 - factor_coul) * prefactor;
            }
          }
        } else {
@ -304,50 +283,46 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
          ecoul = 0.0;
        }

-
        if (rsq < cut_ljsq[itype][jtype]) {

          if (ljt == LJ12_4) {
-            const double r4inv=r2inv*r2inv;
-            forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
-                             - lj2[itype][jtype]);
+            const double r4inv = r2inv * r2inv;
+            forcelj = r4inv * (lj1[itype][jtype] * r4inv * r4inv - lj2[itype][jtype]);

            if (EFLAG)
-              evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
-                             - lj4[itype][jtype]) - offset[itype][jtype];
+              evdwl = r4inv * (lj3[itype][jtype] * r4inv * r4inv - lj4[itype][jtype]) -
+                  offset[itype][jtype];

          } else if (ljt == LJ9_6) {
-            const double r3inv = r2inv*sqrt(r2inv);
-            const double r6inv = r3inv*r3inv;
-            forcelj = r6inv*(lj1[itype][jtype]*r3inv
-                             - lj2[itype][jtype]);
+            const double r3inv = r2inv * sqrt(r2inv);
+            const double r6inv = r3inv * r3inv;
+            forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
            if (EFLAG)
-              evdwl = r6inv*(lj3[itype][jtype]*r3inv
-                             - lj4[itype][jtype]) - offset[itype][jtype];
+              evdwl =
+                  r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];

          } else if (ljt == LJ12_6) {
-            const double r6inv = r2inv*r2inv*r2inv;
-            forcelj = r6inv*(lj1[itype][jtype]*r6inv
-                             - lj2[itype][jtype]);
+            const double r6inv = r2inv * r2inv * r2inv;
+            forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
            if (EFLAG)
-              evdwl = r6inv*(lj3[itype][jtype]*r6inv
-                             - lj4[itype][jtype]) - offset[itype][jtype];
+              evdwl =
+                  r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
          }

          if (EFLAG) evdwl *= factor_lj;

        } else {
-          forcelj=0.0;
+          forcelj = 0.0;
          evdwl = 0.0;
        }

-        fpair = (forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = (forcecoul + factor_lj * forcelj) * r2inv;

-        fxtmp += delx*fpair;
-        fytmp += dely*fpair;
-        fztmp += delz*fpair;
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;

-        if (EVFLAG) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+        if (EVFLAG) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
      }
    }
    f[i][0] += fxtmp;
--- a/src/GPU/pair_lj_sdk_gpu.cpp
+++ b/src/GPU/pair_lj_sdk_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,25 +32,20 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int sdk_gpu_init(const int ntypes, double **cutsq, int **cg_types,
-                 double **host_lj1, double **host_lj2, double **host_lj3,
-                 double **host_lj4, double **offset, double *special_lj,
-                 const int nlocal, const int nall, const int max_nbors,
-                 const int maxspecial, const double cell_size, int &gpu_mode,
-                 FILE *screen);
+int sdk_gpu_init(const int ntypes, double **cutsq, int **cg_types, double **host_lj1,
+                 double **host_lj2, double **host_lj3, double **host_lj4, double **offset,
+                 double *special_lj, const int nlocal, const int nall, const int max_nbors,
+                 const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
 void sdk_gpu_clear();
-int ** sdk_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum,
-                         const double cpu_time, bool &success);
-void sdk_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success);
+int **sdk_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success);
+void sdk_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success);
 double sdk_gpu_bytes();

 #include "lj_sdk_common.h"
@ -83,7 +76,7 @@ PairLJSDKGPU::~PairLJSDKGPU()

 void PairLJSDKGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -91,7 +84,7 @@ void PairLJSDKGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -100,33 +93,32 @@ void PairLJSDKGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = sdk_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                   atom->type, sublo, subhi,
-                                   atom->tag, atom->nspecial, atom->special,
-                                   eflag, vflag, eflag_atom, vflag_atom,
-                                   host_start, &ilist, &numneigh, cpu_time,
-                                   success);
+    firstneigh =
+        sdk_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                          atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                          host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    sdk_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success);
+    sdk_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    if (evflag) {
-      if (eflag) cpu_compute<1,1>(host_start, inum, ilist, numneigh, firstneigh);
-      else cpu_compute<1,0>(host_start, inum, ilist, numneigh, firstneigh);
-    } else cpu_compute<0,0>(host_start, inum, ilist, numneigh, firstneigh);
+      if (eflag)
+        cpu_compute<1, 1>(host_start, inum, ilist, numneigh, firstneigh);
+      else
+        cpu_compute<1, 0>(host_start, inum, ilist, numneigh, firstneigh);
+    } else
+      cpu_compute<0, 0>(host_start, inum, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
  }
 }
@ -144,10 +136,9 @@ void PairLJSDKGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -155,21 +146,15 @@ void PairLJSDKGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = sdk_gpu_init(atom->ntypes+1,cutsq,lj_type,lj1,lj2,lj3,lj4,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = sdk_gpu_init(atom->ntypes + 1, cutsq, lj_type, lj1, lj2, lj3, lj4, offset,
+                             force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                             maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -182,19 +167,18 @@ double PairLJSDKGPU::memory_usage()

 /* ---------------------------------------------------------------------- */
 template <int EVFLAG, int EFLAG>
-void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist,
-                               int *numneigh, int **firstneigh)
+void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist, int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,forcelj,factor_lj;
+  int i, j, ii, jj, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, forcelj, factor_lj;

-  const double * const * const x = atom->x;
-  double * const * const f = atom->f;
-  const int * const type = atom->type;
-  const double * const special_lj = force->special_lj;
-  double fxtmp,fytmp,fztmp;
-  evdwl=0.0;
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const type = atom->type;
+  const double *const special_lj = force->special_lj;
+  double fxtmp, fytmp, fztmp;
+  evdwl = 0.0;

  // loop over neighbors of my atoms

@ -203,10 +187,10 @@ void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist,
    xtmp = x[i][0];
    ytmp = x[i][1];
    ztmp = x[i][2];
-    fxtmp=fytmp=fztmp=0.0;
+    fxtmp = fytmp = fztmp = 0.0;

    const int itype = type[i];
-    const int * const jlist = firstneigh[i];
+    const int *const jlist = firstneigh[i];
    const int jnum = numneigh[i];

    for (jj = 0; jj < jnum; jj++) {
@ -217,47 +201,43 @@ void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        const int ljt = lj_type[itype][jtype];

        if (ljt == LJ12_4) {
-          const double r4inv=r2inv*r2inv;
-          forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
-                           - lj2[itype][jtype]);
+          const double r4inv = r2inv * r2inv;
+          forcelj = r4inv * (lj1[itype][jtype] * r4inv * r4inv - lj2[itype][jtype]);

          if (EFLAG)
-            evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
-                           - lj4[itype][jtype]) - offset[itype][jtype];
+            evdwl = r4inv * (lj3[itype][jtype] * r4inv * r4inv - lj4[itype][jtype]) -
+                offset[itype][jtype];

        } else if (ljt == LJ9_6) {
-          const double r3inv = r2inv*sqrt(r2inv);
-          const double r6inv = r3inv*r3inv;
-          forcelj = r6inv*(lj1[itype][jtype]*r3inv
-                           - lj2[itype][jtype]);
+          const double r3inv = r2inv * sqrt(r2inv);
+          const double r6inv = r3inv * r3inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
          if (EFLAG)
-            evdwl = r6inv*(lj3[itype][jtype]*r3inv
-                           - lj4[itype][jtype]) - offset[itype][jtype];
+            evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];

        } else if (ljt == LJ12_6) {
-          const double r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv*(lj1[itype][jtype]*r6inv
-                          - lj2[itype][jtype]);
+          const double r6inv = r2inv * r2inv * r2inv;
+          forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
          if (EFLAG)
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv
-                           - lj4[itype][jtype]) - offset[itype][jtype];
-        } else continue;
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
+        } else
+          continue;

-        fpair = factor_lj*forcelj*r2inv;
+        fpair = factor_lj * forcelj * r2inv;

-        fxtmp += delx*fpair;
-        fytmp += dely*fpair;
-        fztmp += delz*fpair;
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;

-        if (EVFLAG) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (EVFLAG) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
    f[i][0] += fxtmp;
--- a/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
+++ b/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"
 #include "update.h"
@ -36,35 +34,28 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
-                   double **host_lj2, double **host_lj3, double **host_lj4,
-                   double *special_lj, const int nlocal,
+int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                   double **host_lj3, double **host_lj4, double *special_lj, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
-                   const double cell_size, int &gpu_mode, FILE *screen,
-                   double **host_cut_ljsq, double **host_cut_coulsq,
-                   double *host_special_coul, const double qqrd2e);
+                   const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                   double **host_cut_coulsq, double *host_special_coul, const double qqrd2e);
 void dplsf_gpu_clear();
-int ** dplsf_gpu_compute_n(const int ago, const int inum, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum, const double cpu_time,
-                           bool &success, double *host_q, double **host_mu,
-                           double *boxlo, double *prd);
-void dplsf_gpu_compute(const int ago, const int inum, const int nall,
-                       double **host_x, int *host_type, int *ilist, int *numj,
-                       int **firstneigh, const bool eflag, const bool vflag,
-                       const bool eatom, const bool vatom, int &host_start,
-                       const double cpu_time, bool &success, double *host_q,
-                       double **host_mu, const int nlocal, double *boxlo,
-                       double *prd);
+int **dplsf_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                          int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                          tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                          const bool vatom, int &host_start, int **ilist, int **jnum,
+                          const double cpu_time, bool &success, double *host_q, double **host_mu,
+                          double *boxlo, double *prd);
+void dplsf_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                       const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                       const double cpu_time, bool &success, double *host_q, double **host_mu,
+                       const int nlocal, double *boxlo, double *prd);
 double dplsf_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp),
-  gpu_mode(GPU_FORCE)
+PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -86,7 +77,7 @@ PairLJSFDipoleSFGPU::~PairLJSFDipoleSFGPU()

 void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -94,7 +85,7 @@ void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -103,30 +94,25 @@ void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = dplsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                     atom->type, sublo, subhi,
-                                     atom->tag, atom->nspecial, atom->special,
-                                     eflag, vflag, eflag_atom, vflag_atom,
-                                     host_start, &ilist, &numneigh, cpu_time,
-                                     success, atom->q, atom->mu, domain->boxlo,
-                                     domain->prd);
+    firstneigh = dplsf_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->q, atom->mu, domain->boxlo, domain->prd);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success, atom->q,
+    dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                      eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->mu, atom->nlocal, domain->boxlo, domain->prd);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -140,11 +126,10 @@ void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
 void PairLJSFDipoleSFGPU::init_style()
 {
  if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
-    error->all(FLERR,"Pair dipole/sf/gpu requires atom attributes q, mu, torque");
+    error->all(FLERR, "Pair dipole/sf/gpu requires atom attributes q, mu, torque");

-
-  if (strcmp(update->unit_style,"electron") == 0)
-    error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
+  if (strcmp(update->unit_style, "electron") == 0)
+    error->all(FLERR, "Cannot (yet) use 'electron' units with dipoles");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -152,10 +137,9 @@ void PairLJSFDipoleSFGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -163,22 +147,16 @@ void PairLJSFDipoleSFGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = dplsf_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
-                               force->special_lj, atom->nlocal,
-                               atom->nlocal+atom->nghost, mnf, maxspecial,
-                               cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
-                               force->special_coul, force->qqrd2e);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      dplsf_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, force->special_lj, atom->nlocal,
+                     atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
+                     cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -191,25 +169,24 @@ double PairLJSFDipoleSFGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
-                                  int *ilist, int *numneigh,
-                                  int **firstneigh)
+void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag, int *ilist,
+                                      int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
-  double rsq,rinv,r2inv,r6inv,r3inv,r5inv;
-  double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
-  double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
-  double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
-  double forcelj,factor_coul,factor_lj;
-  double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
-  double aforcecoulx,aforcecouly,aforcecoulz;
-  double bforcecoulx,bforcecouly,bforcecoulz;
-  double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fx, fy, fz;
+  double rsq, rinv, r2inv, r6inv, r3inv, r5inv;
+  double forcecoulx, forcecouly, forcecoulz, crossx, crossy, crossz;
+  double tixcoul, tiycoul, tizcoul, tjxcoul, tjycoul, tjzcoul;
+  double fq, pdotp, pidotr, pjdotr, pre1, pre2, pre3, pre4;
+  double forcelj, factor_coul, factor_lj;
+  double presf, afac, bfac, pqfac, qpfac, forceljcut, forceljsf;
+  double aforcecoulx, aforcecouly, aforcecoulz;
+  double bforcecoulx, bforcecouly, bforcecoulz;
+  double rcutlj2inv, rcutcoul2inv, rcutlj6inv;
  int *jlist;

  evdwl = ecoul = 0.0;
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  double **x = atom->x;
  double **f = atom->f;
@ -221,7 +198,6 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
  double *special_lj = force->special_lj;
  double qqrd2e = force->qqrd2e;

-
  // loop over neighbors of my atoms

  for (ii = start; ii < inum; ii++) {
@ -243,11 +219,11 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        rinv = sqrt(r2inv);

        // atom can have both a charge and dipole
@ -260,146 +236,145 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
        if (rsq < cut_coulsq[itype][jtype]) {

          if (qtmp != 0.0 && q[j] != 0.0) {
-            pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]);
+            pre1 = qtmp * q[j] * rinv * (r2inv - 1.0 / cut_coulsq[itype][jtype]);

-            forcecoulx += pre1*delx;
-            forcecouly += pre1*dely;
-            forcecoulz += pre1*delz;
+            forcecoulx += pre1 * delx;
+            forcecouly += pre1 * dely;
+            forcecoulz += pre1 * delz;
          }

          if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
-            r3inv = r2inv*rinv;
-            r5inv = r3inv*r2inv;
-            rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
+            r3inv = r2inv * rinv;
+            r5inv = r3inv * r2inv;
+            rcutcoul2inv = 1.0 / cut_coulsq[itype][jtype];

-            pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
-            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
-            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
+            pdotp = mu[i][0] * mu[j][0] + mu[i][1] * mu[j][1] + mu[i][2] * mu[j][2];
+            pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
+            pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;

-            afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
-            pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
-            aforcecoulx = pre1*delx;
-            aforcecouly = pre1*dely;
-            aforcecoulz = pre1*delz;
+            afac = 1.0 - rsq * rsq * rcutcoul2inv * rcutcoul2inv;
+            pre1 = afac * (pdotp - 3.0 * r2inv * pidotr * pjdotr);
+            aforcecoulx = pre1 * delx;
+            aforcecouly = pre1 * dely;
+            aforcecoulz = pre1 * delz;

-            bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) +
-              3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
+            bfac = 1.0 - 4.0 * rsq * sqrt(rsq) * rcutcoul2inv * sqrt(rcutcoul2inv) +
+                3.0 * rsq * rsq * rcutcoul2inv * rcutcoul2inv;
            presf = 2.0 * r2inv * pidotr * pjdotr;
-            bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx);
-            bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely);
-            bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz);
+            bforcecoulx = bfac * (pjdotr * mu[i][0] + pidotr * mu[j][0] - presf * delx);
+            bforcecouly = bfac * (pjdotr * mu[i][1] + pidotr * mu[j][1] - presf * dely);
+            bforcecoulz = bfac * (pjdotr * mu[i][2] + pidotr * mu[j][2] - presf * delz);

-            forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
-            forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
-            forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
+            forcecoulx += 3.0 * r5inv * (aforcecoulx + bforcecoulx);
+            forcecouly += 3.0 * r5inv * (aforcecouly + bforcecouly);
+            forcecoulz += 3.0 * r5inv * (aforcecoulz + bforcecoulz);

            pre2 = 3.0 * bfac * r5inv * pjdotr;
            pre3 = 3.0 * bfac * r5inv * pidotr;
            pre4 = -bfac * r3inv;

-            crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
-            crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
-            crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
+            crossx = pre4 * (mu[i][1] * mu[j][2] - mu[i][2] * mu[j][1]);
+            crossy = pre4 * (mu[i][2] * mu[j][0] - mu[i][0] * mu[j][2]);
+            crossz = pre4 * (mu[i][0] * mu[j][1] - mu[i][1] * mu[j][0]);

-            tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
-            tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
-            tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
-            tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
-            tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
-            tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
+            tixcoul += crossx + pre2 * (mu[i][1] * delz - mu[i][2] * dely);
+            tiycoul += crossy + pre2 * (mu[i][2] * delx - mu[i][0] * delz);
+            tizcoul += crossz + pre2 * (mu[i][0] * dely - mu[i][1] * delx);
+            tjxcoul += -crossx + pre3 * (mu[j][1] * delz - mu[j][2] * dely);
+            tjycoul += -crossy + pre3 * (mu[j][2] * delx - mu[j][0] * delz);
+            tjzcoul += -crossz + pre3 * (mu[j][0] * dely - mu[j][1] * delx);
          }

          if (mu[i][3] > 0.0 && q[j] != 0.0) {
-            r3inv = r2inv*rinv;
-            r5inv = r3inv*r2inv;
-            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
-            rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
-            pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
-            pqfac = 1.0 - 3.0*rsq*rcutcoul2inv +
-              2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
+            r3inv = r2inv * rinv;
+            r5inv = r3inv * r2inv;
+            pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
+            rcutcoul2inv = 1.0 / cut_coulsq[itype][jtype];
+            pre1 = 3.0 * q[j] * r5inv * pidotr * (1 - rsq * rcutcoul2inv);
+            pqfac = 1.0 - 3.0 * rsq * rcutcoul2inv +
+                2.0 * rsq * sqrt(rsq) * rcutcoul2inv * sqrt(rcutcoul2inv);
            pre2 = q[j] * r3inv * pqfac;

-            forcecoulx += pre2*mu[i][0] - pre1*delx;
-            forcecouly += pre2*mu[i][1] - pre1*dely;
-            forcecoulz += pre2*mu[i][2] - pre1*delz;
-            tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
-            tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
-            tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
+            forcecoulx += pre2 * mu[i][0] - pre1 * delx;
+            forcecouly += pre2 * mu[i][1] - pre1 * dely;
+            forcecoulz += pre2 * mu[i][2] - pre1 * delz;
+            tixcoul += pre2 * (mu[i][1] * delz - mu[i][2] * dely);
+            tiycoul += pre2 * (mu[i][2] * delx - mu[i][0] * delz);
+            tizcoul += pre2 * (mu[i][0] * dely - mu[i][1] * delx);
          }

          if (mu[j][3] > 0.0 && qtmp != 0.0) {
-            r3inv = r2inv*rinv;
-            r5inv = r3inv*r2inv;
-            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
-            rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
-            pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
-            qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
-              2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
+            r3inv = r2inv * rinv;
+            r5inv = r3inv * r2inv;
+            pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;
+            rcutcoul2inv = 1.0 / cut_coulsq[itype][jtype];
+            pre1 = 3.0 * qtmp * r5inv * pjdotr * (1 - rsq * rcutcoul2inv);
+            qpfac = 1.0 - 3.0 * rsq * rcutcoul2inv +
+                2.0 * rsq * sqrt(rsq) * rcutcoul2inv * sqrt(rcutcoul2inv);
            pre2 = qtmp * r3inv * qpfac;

-            forcecoulx += pre1*delx - pre2*mu[j][0];
-            forcecouly += pre1*dely - pre2*mu[j][1];
-            forcecoulz += pre1*delz - pre2*mu[j][2];
-            tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
-            tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
-            tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
+            forcecoulx += pre1 * delx - pre2 * mu[j][0];
+            forcecouly += pre1 * dely - pre2 * mu[j][1];
+            forcecoulz += pre1 * delz - pre2 * mu[j][2];
+            tjxcoul += -pre2 * (mu[j][1] * delz - mu[j][2] * dely);
+            tjycoul += -pre2 * (mu[j][2] * delx - mu[j][0] * delz);
+            tjzcoul += -pre2 * (mu[j][0] * dely - mu[j][1] * delx);
          }
        }

        // LJ interaction

        if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
-          forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
+          r6inv = r2inv * r2inv * r2inv;
+          forceljcut = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]) * r2inv;

          rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
          rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
-          forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) *
-          rcutlj6inv * rcutlj2inv;
+          forceljsf =
+              (lj1[itype][jtype] * rcutlj6inv - lj2[itype][jtype]) * rcutlj6inv * rcutlj2inv;

          forcelj = factor_lj * (forceljcut - forceljsf);
-        } else forcelj = 0.0;
+        } else
+          forcelj = 0.0;

        // total force

-        fq = factor_coul*qqrd2e;
-        fx = fq*forcecoulx + delx*forcelj;
-        fy = fq*forcecouly + dely*forcelj;
-        fz = fq*forcecoulz + delz*forcelj;
+        fq = factor_coul * qqrd2e;
+        fx = fq * forcecoulx + delx * forcelj;
+        fy = fq * forcecouly + dely * forcelj;
+        fz = fq * forcecoulz + delz * forcelj;

        // force & torque accumulation

        f[i][0] += fx;
        f[i][1] += fy;
        f[i][2] += fz;
-        torque[i][0] += fq*tixcoul;
-        torque[i][1] += fq*tiycoul;
-        torque[i][2] += fq*tizcoul;
+        torque[i][0] += fq * tixcoul;
+        torque[i][1] += fq * tiycoul;
+        torque[i][2] += fq * tizcoul;

        if (eflag) {
          if (rsq < cut_coulsq[itype][jtype]) {
-            ecoul = qtmp*q[j]*rinv*
-              pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2);
+            ecoul = qtmp * q[j] * rinv * pow((1.0 - sqrt(rsq) / sqrt(cut_coulsq[itype][jtype])), 2);
            if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
-              ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
-            if (mu[i][3] > 0.0 && q[j] != 0.0)
-              ecoul += -q[j]*r3inv * pqfac * pidotr;
-            if (mu[j][3] > 0.0 && qtmp != 0.0)
-              ecoul += qtmp*r3inv * qpfac * pjdotr;
-            ecoul *= factor_coul*qqrd2e;
-          } else ecoul = 0.0;
+              ecoul += bfac * (r3inv * pdotp - 3.0 * r5inv * pidotr * pjdotr);
+            if (mu[i][3] > 0.0 && q[j] != 0.0) ecoul += -q[j] * r3inv * pqfac * pidotr;
+            if (mu[j][3] > 0.0 && qtmp != 0.0) ecoul += qtmp * r3inv * qpfac * pjdotr;
+            ecoul *= factor_coul * qqrd2e;
+          } else
+            ecoul = 0.0;

          if (rsq < cut_ljsq[itype][jtype]) {
-            evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) +
-              rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
-              rsq*rcutlj2inv +
-              rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
+            evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) +
+                rcutlj6inv * (6 * lj3[itype][jtype] * rcutlj6inv - 3 * lj4[itype][jtype]) * rsq *
+                    rcutlj2inv +
+                rcutlj6inv * (-7 * lj3[itype][jtype] * rcutlj6inv + 4 * lj4[itype][jtype]);
            evdwl *= factor_lj;
-          } else evdwl = 0.0;
+          } else
+            evdwl = 0.0;
        }

-        if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,
-                                      fx,fy,fz,delx,dely,delz);
+        if (evflag) ev_tally_xyz_full(i, evdwl, ecoul, fx, fy, fz, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_lj_smooth_gpu.cpp
+++ b/src/GPU/pair_lj_smooth_gpu.cpp
@ -23,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -155,11 +154,7 @@ void PairLJSmoothGPU::init_style()
                     gpu_mode, screen, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq);
  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this, instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
--- a/src/GPU/pair_mie_cut_gpu.cpp
+++ b/src/GPU/pair_mie_cut_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,25 +32,21 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
-                 double **host_mie2, double **host_mie3, double **host_mie4,
-                 double **host_gamA, double **host_gamR, double **offset,
-                 double *special_lj, const int nlocal,
-                 const int nall, const int max_nbors, const int maxspecial,
-                 const double cell_size, int &gpu_mode, FILE *screen);
+int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1, double **host_mie2,
+                 double **host_mie3, double **host_mie4, double **host_gamA, double **host_gamR,
+                 double **offset, double *special_lj, const int nlocal, const int nall,
+                 const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
+                 FILE *screen);
 void mie_gpu_clear();
-int ** mie_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum,
-                         const double cpu_time, bool &success);
-void mie_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success);
+int **mie_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success);
+void mie_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success);
 double mie_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -79,7 +73,7 @@ PairMIECutGPU::~PairMIECutGPU()

 void PairMIECutGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -87,7 +81,7 @@ void PairMIECutGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -96,28 +90,24 @@ void PairMIECutGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = mie_gpu_compute_n(neighbor->ago, inum, nall,
-                                   atom->x, atom->type, sublo,
-                                   subhi, atom->tag, atom->nspecial,
-                                   atom->special, eflag, vflag, eflag_atom,
-                                   vflag_atom, host_start,
-                                   &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        mie_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                          atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                          host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    mie_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success);
+    mie_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -132,17 +122,15 @@ void PairMIECutGPU::init_style()
 {
  cut_respa = nullptr;

-
  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
  double cut;
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +138,15 @@ void PairMIECutGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = mie_gpu_init(atom->ntypes+1, cutsq, mie1, mie2, mie3, mie4,
-                             gamA, gamR, offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = mie_gpu_init(atom->ntypes + 1, cutsq, mie1, mie2, mie3, mie4, gamA, gamR, offset,
+                             force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                             maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -177,11 +159,12 @@ double PairMIECutGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                                int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie;
+void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, rgamR, rgamA, forcemie, factor_mie;
  int *jlist;

  double **x = atom->x;
@ -208,27 +191,26 @@ void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
-        rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
-        rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
-        forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
-        fpair = factor_mie*forcemie*r2inv;
+        r2inv = 1.0 / rsq;
+        rgamA = pow(r2inv, (gamA[itype][jtype] / 2.0));
+        rgamR = pow(r2inv, (gamR[itype][jtype] / 2.0));
+        forcemie = (mie1[itype][jtype] * rgamR - mie2[itype][jtype] * rgamA);
+        fpair = factor_mie * forcemie * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = (mie3[itype][jtype]*rgamR - mie4[itype][jtype]*rgamA) -
-            offset[itype][jtype];
+          evdwl = (mie3[itype][jtype] * rgamR - mie4[itype][jtype] * rgamA) - offset[itype][jtype];
          evdwl *= factor_mie;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_morse_gpu.cpp
+++ b/src/GPU/pair_morse_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,24 +32,20 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1,
-                 double **host_r0, double **host_alpha, double **host_d0,
-                 double **offset, double *special_lj, const int nlocal,
-                 const int nall, const int max_nbors, const int maxspecial,
+int mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1, double **host_r0,
+                 double **host_alpha, double **host_d0, double **offset, double *special_lj,
+                 const int nlocal, const int nall, const int max_nbors, const int maxspecial,
                 const double cell_size, int &gpu_mode, FILE *screen);
 void mor_gpu_clear();
-int ** mor_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum,
-                         const double cpu_time, bool &success);
-void mor_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success);
+int **mor_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success);
+void mor_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success);
 double mor_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -77,7 +71,7 @@ PairMorseGPU::~PairMorseGPU()

 void PairMorseGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -85,7 +79,7 @@ void PairMorseGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -94,28 +88,24 @@ void PairMorseGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = mor_gpu_compute_n(neighbor->ago, inum, nall,
-                                   atom->x, atom->type, sublo,
-                                   subhi, atom->tag, atom->nspecial,
-                                   atom->special, eflag, vflag, eflag_atom,
-                                   vflag_atom, host_start, &ilist, &numneigh,
-                                   cpu_time, success);
+    firstneigh =
+        mor_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                          atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                          host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    mor_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success);
+    mor_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -135,10 +125,9 @@ void PairMorseGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -146,21 +135,15 @@ void PairMorseGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = mor_gpu_init(atom->ntypes+1, cutsq, morse1, r0, alpha, d0,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = mor_gpu_init(atom->ntypes + 1, cutsq, morse1, r0, alpha, d0, offset,
+                             force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                             maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -173,12 +156,12 @@ double PairMorseGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                               int *ilist, int *numneigh, int **firstneigh)
+void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                               int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r,dr,dexp,factor_lj;
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r, dr, dexp, factor_lj;
  int *jlist;

  double **x = atom->x;
@ -205,26 +188,25 @@ void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
        r = sqrt(rsq);
        dr = r - r0[itype][jtype];
        dexp = exp(-alpha[itype][jtype] * dr);
-        fpair = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r;
+        fpair = factor_lj * morse1[itype][jtype] * (dexp * dexp - dexp) / r;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) -
-            offset[itype][jtype];
+          evdwl = d0[itype][jtype] * (dexp * dexp - 2.0 * dexp) - offset[itype][jtype];
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_resquared_gpu.cpp
+++ b/src/GPU/pair_resquared_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -27,7 +26,6 @@
 #include "math_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -37,39 +35,32 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int re_gpu_init(const int ntypes, double **shape, double **well,
-                double **cutsq, double **sigma, double **epsilon,
-                int **form, double **host_lj1, double **host_lj2,
-                double **host_lj3, double **host_lj4, double **offset,
-                double *special_lj, const int nlocal, const int nall,
-                const int max_nbors, const int maxspecial,
+int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq, double **sigma,
+                double **epsilon, int **form, double **host_lj1, double **host_lj2,
+                double **host_lj3, double **host_lj4, double **offset, double *special_lj,
+                const int nlocal, const int nall, const int max_nbors, const int maxspecial,
                const double cell_size, int &gpu_mode, FILE *screen);
 void re_gpu_clear();
-int ** re_gpu_compute_n(const int ago, const int inum, const int nall,
-                        double **host_x, int *host_type, double *sublo,
-                        double *subhi, tagint *tag, int **nspecial,
-                        tagint **special, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        int **ilist, int **jnum, const double cpu_time,
-                        bool &success, double **host_quat);
-int * re_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success, double **host_quat);
+int **re_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                       const bool vatom, int &host_start, int **ilist, int **jnum,
+                       const double cpu_time, bool &success, double **host_quat);
+int *re_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                    int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                    const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                    bool &success, double **host_quat);
 double re_gpu_bytes();

-enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
+enum { SPHERE_SPHERE, SPHERE_ELLIPSE, ELLIPSE_SPHERE, ELLIPSE_ELLIPSE };

 /* ---------------------------------------------------------------------- */

-PairRESquaredGPU::PairRESquaredGPU(LAMMPS *lmp) : PairRESquared(lmp),
-                                                gpu_mode(GPU_FORCE)
+PairRESquaredGPU::PairRESquaredGPU(LAMMPS *lmp) : PairRESquared(lmp), gpu_mode(GPU_FORCE)
 {
  reinitflag = 0;
  avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
-  if (!avec)
-    error->all(FLERR,"Pair resquared/gpu requires atom style ellipsoid");
+  if (!avec) error->all(FLERR, "Pair resquared/gpu requires atom style ellipsoid");
  quat_nmax = 0;
  quat = nullptr;
  suffix_flag |= Suffix::GPU;
@ -91,7 +82,7 @@ PairRESquaredGPU::~PairRESquaredGPU()

 void PairRESquaredGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -105,7 +96,7 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
  }
  AtomVecEllipsoid::Bonus *bonus = avec->bonus;
  int *ellipsoid = atom->ellipsoid;
-  for (int i=0; i<nall; i++) {
+  for (int i = 0; i < nall; i++) {
    int qi = ellipsoid[i];
    if (qi > -1) {
      quat[i][0] = bonus[qi].quat[0];
@ -116,7 +107,7 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
  }

  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -125,26 +116,22 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = re_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                  atom->type, sublo, subhi,
-                                  atom->tag, atom->nspecial, atom->special,
-                                  eflag, vflag, eflag_atom, vflag_atom,
-                                  host_start, &ilist, &numneigh, cpu_time,
-                                  success, quat);
+    firstneigh =
+        re_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                         atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                         host_start, &ilist, &numneigh, cpu_time, success, quat);
  } else {
    inum = list->inum;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ilist = re_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                           list->ilist, numneigh, firstneigh, eflag, vflag,
-                           eflag_atom, vflag_atom, host_start,
-                           cpu_time, success, quat);
+    ilist = re_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, list->ilist, numneigh,
+                           firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+                           success, quat);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

  if (host_start < inum) {
    cpu_time = platform::walltime();
@ -159,21 +146,20 @@ void PairRESquaredGPU::compute(int eflag, int vflag)

 void PairRESquaredGPU::init_style()
 {
-  if (!atom->ellipsoid_flag)
-    error->all(FLERR,"Pair resquared/gpu requires atom style ellipsoid");
+  if (!atom->ellipsoid_flag) error->all(FLERR, "Pair resquared/gpu requires atom style ellipsoid");

  // per-type shape precalculations
  // require that atom shapes are identical within each type
  // if shape = 0 for point particle, set shape = 1 as required by Gay-Berne

  for (int i = 1; i <= atom->ntypes; i++) {
-    if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
-      error->all(FLERR,"Pair resquared/gpu requires atoms with same type have same shape");
+    if (!atom->shape_consistency(i, shape1[i][0], shape1[i][1], shape1[i][2]))
+      error->all(FLERR, "Pair resquared/gpu requires atoms with same type have same shape");
    if (setwell[i]) {
-      shape2[i][0] = shape1[i][0]*shape1[i][0];
-      shape2[i][1] = shape1[i][1]*shape1[i][1];
-      shape2[i][2] = shape1[i][2]*shape1[i][2];
-      lshape[i] = shape1[i][0]*shape1[i][1]*shape1[i][2];
+      shape2[i][0] = shape1[i][0] * shape1[i][0];
+      shape2[i][1] = shape1[i][1] * shape1[i][1];
+      shape2[i][2] = shape1[i][2] * shape1[i][2];
+      lshape[i] = shape1[i][0] * shape1[i][1] * shape1[i][2];
    }
  }

@ -183,10 +169,9 @@ void PairRESquaredGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -195,22 +180,16 @@ void PairRESquaredGPU::init_style()

  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = re_gpu_init(atom->ntypes+1, shape1, well, cutsq, sigma,
-                            epsilon, form, lj1, lj2, lj3, lj4, offset,
-                            force->special_lj, atom->nlocal,
-                            atom->nlocal+atom->nghost, mnf, maxspecial,
-                            cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      re_gpu_init(atom->ntypes + 1, shape1, well, cutsq, sigma, epsilon, form, lj1, lj2, lj3, lj4,
+                  offset, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                  maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
  quat_nmax = static_cast<int>(1.1 * (atom->nlocal + atom->nghost));
  memory->grow(quat, quat_nmax, 4, "pair:quat");
 }
@ -220,20 +199,19 @@ void PairRESquaredGPU::init_style()
 double PairRESquaredGPU::memory_usage()
 {
  double bytes = Pair::memory_usage();
-  return bytes + memory->usage(quat,quat_nmax)+re_gpu_bytes();
+  return bytes + memory->usage(quat, quat_nmax) + re_gpu_bytes();
 }

 /* ---------------------------------------------------------------------- */

-void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
-                                   int /* vflag */, int *ilist,
+void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
                                   int *numneigh, int **firstneigh)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
-  double fforce[3],ttor[3],rtor[3],r12[3];
+  int i, j, ii, jj, jnum, itype, jtype;
+  double evdwl, one_eng, rsq, r2inv, r6inv, forcelj, factor_lj;
+  double fforce[3], ttor[3], rtor[3], r12[3];
  int *jlist;
-  RE2Vars wi,wj;
+  RE2Vars wi, wj;

  double **x = atom->x;
  double **f = atom->f;
@ -249,7 +227,7 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,

    // not a LJ sphere

-    if (lshape[itype] != 0.0) precompute_i(i,wi);
+    if (lshape[itype] != 0.0) precompute_i(i, wi);

    jlist = firstneigh[i];
    jnum = numneigh[i];
@ -261,10 +239,10 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,

      // r12 = center to center vector

-      r12[0] = x[j][0]-x[i][0];
-      r12[1] = x[j][1]-x[i][1];
-      r12[2] = x[j][2]-x[i][2];
-      rsq = MathExtra::dot3(r12,r12);
+      r12[0] = x[j][0] - x[i][0];
+      r12[1] = x[j][1] - x[i][1];
+      r12[2] = x[j][2] - x[i][2];
+      rsq = MathExtra::dot3(r12, r12);
      jtype = type[j];

      // compute if less than cutoff
@ -272,39 +250,39 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
      if (rsq < cutsq[itype][jtype]) {
        switch (form[itype][jtype]) {

-         case SPHERE_SPHERE:
-          r2inv = 1.0/rsq;
-          r6inv = r2inv*r2inv*r2inv;
-          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-          forcelj *= -r2inv;
-          if (eflag) one_eng =
-              r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
-              offset[itype][jtype];
-          fforce[0] = r12[0]*forcelj;
-          fforce[1] = r12[1]*forcelj;
-          fforce[2] = r12[2]*forcelj;
-          break;
+          case SPHERE_SPHERE:
+            r2inv = 1.0 / rsq;
+            r6inv = r2inv * r2inv * r2inv;
+            forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
+            forcelj *= -r2inv;
+            if (eflag)
+              one_eng =
+                  r6inv * (r6inv * lj3[itype][jtype] - lj4[itype][jtype]) - offset[itype][jtype];
+            fforce[0] = r12[0] * forcelj;
+            fforce[1] = r12[1] * forcelj;
+            fforce[2] = r12[2] * forcelj;
+            break;

-         case SPHERE_ELLIPSE:
-          precompute_i(j,wj);
-          one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false);
-          break;
+          case SPHERE_ELLIPSE:
+            precompute_i(j, wj);
+            one_eng = resquared_lj(j, i, wj, r12, rsq, fforce, rtor, false);
+            break;

-         case ELLIPSE_SPHERE:
-          one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
-          tor[i][0] += ttor[0]*factor_lj;
-          tor[i][1] += ttor[1]*factor_lj;
-          tor[i][2] += ttor[2]*factor_lj;
-          break;
+          case ELLIPSE_SPHERE:
+            one_eng = resquared_lj(i, j, wi, r12, rsq, fforce, ttor, true);
+            tor[i][0] += ttor[0] * factor_lj;
+            tor[i][1] += ttor[1] * factor_lj;
+            tor[i][2] += ttor[2] * factor_lj;
+            break;

-         default:
-          precompute_i(j,wj);
-          one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
-          tor[i][0] += ttor[0]*factor_lj;
-          tor[i][1] += ttor[1]*factor_lj;
-          tor[i][2] += ttor[2]*factor_lj;
+          default:
+            precompute_i(j, wj);
+            one_eng = resquared_analytic(i, j, wi, wj, r12, rsq, fforce, ttor, rtor);
+            tor[i][0] += ttor[0] * factor_lj;
+            tor[i][1] += ttor[1] * factor_lj;
+            tor[i][2] += ttor[2] * factor_lj;

-         break;
+            break;
        }

        fforce[0] *= factor_lj;
@ -314,10 +292,11 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
        f[i][1] += fforce[1];
        f[i][2] += fforce[2];

-        if (eflag) evdwl = factor_lj*one_eng;
+        if (eflag) evdwl = factor_lj * one_eng;

-        if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],
-                                      fforce[2],-r12[0],-r12[1],-r12[2]);
+        if (evflag)
+          ev_tally_xyz_full(i, evdwl, 0.0, fforce[0], fforce[1], fforce[2], -r12[0], -r12[1],
+                            -r12[2]);
      }
    }
  }
--- a/src/GPU/pair_soft_gpu.cpp
+++ b/src/GPU/pair_soft_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
 #include "gpu_extra.h"
 #include "math_const.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -35,28 +33,22 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int soft_gpu_init(const int ntypes, double **cutsq, double **prefactor,
-                   double **cut, double *special_lj, const int nlocal,
-                   const int nall, const int max_nbors, const int maxspecial,
-                   const double cell_size, int &gpu_mode, FILE *screen);
-void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
-                     double **host_cut);
+int soft_gpu_init(const int ntypes, double **cutsq, double **prefactor, double **cut,
+                  double *special_lj, const int nlocal, const int nall, const int max_nbors,
+                  const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
+void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor, double **host_cut);
 void soft_gpu_clear();
-int ** soft_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum,
-                          const double cpu_time, bool &success);
-void soft_gpu_compute(const int ago, const int inum, const int nall,
-                       double **host_x, int *host_type, int *ilist, int *numj,
-                       int **firstneigh, const bool eflag, const bool vflag,
-                       const bool eatom, const bool vatom, int &host_start,
-                       const double cpu_time, bool &success);
+int **soft_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void soft_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success);
 double soft_gpu_bytes();

-
 using namespace MathConst;

 /* ---------------------------------------------------------------------- */
@ -82,7 +74,7 @@ PairSoftGPU::~PairSoftGPU()

 void PairSoftGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -90,7 +82,7 @@ void PairSoftGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -99,28 +91,24 @@ void PairSoftGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = soft_gpu_compute_n(neighbor->ago, inum, nall,
-                                     atom->x, atom->type, sublo,
-                                     subhi, atom->tag, atom->nspecial,
-                                     atom->special, eflag, vflag, eflag_atom,
-                                     vflag_atom, host_start,
-                                     &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        soft_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    soft_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success);
+    soft_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -140,10 +128,9 @@ void PairSoftGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        mcut = init_one(i,j);
+        mcut = init_one(i, j);
        mcut *= mcut;
-        if (mcut > maxcut)
-          maxcut = mcut;
+        if (mcut > maxcut) maxcut = mcut;
        cutsq[i][j] = cutsq[j][i] = mcut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -151,21 +138,15 @@ void PairSoftGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = soft_gpu_init(atom->ntypes+1, cutsq, prefactor, cut,
-                              force->special_lj, atom->nlocal,
-                              atom->nlocal+atom->nghost, mnf, maxspecial,
-                              cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      soft_gpu_init(atom->ntypes + 1, cutsq, prefactor, cut, force->special_lj, atom->nlocal,
+                    atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -174,7 +155,7 @@ void PairSoftGPU::reinit()
 {
  Pair::reinit();

-  soft_gpu_reinit(atom->ntypes+1, cutsq, prefactor, cut);
+  soft_gpu_reinit(atom->ntypes + 1, cutsq, prefactor, cut);
 }

 /* ---------------------------------------------------------------------- */
@ -187,11 +168,12 @@ double PairSoftGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                              int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double r,rsq,arg,factor_lj;
+void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                              int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double r, rsq, arg, factor_lj;
  int *jlist;

  double **x = atom->x;
@ -218,24 +200,24 @@ void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
        r = sqrt(rsq);
-        arg = MY_PI*r/cut[itype][jtype];
-        if (r > 0.0) fpair = factor_lj * prefactor[itype][jtype] *
-                       sin(arg) * MY_PI/cut[itype][jtype]/r;
-        else fpair = 0.0;
+        arg = MY_PI * r / cut[itype][jtype];
+        if (r > 0.0)
+          fpair = factor_lj * prefactor[itype][jtype] * sin(arg) * MY_PI / cut[itype][jtype] / r;
+        else
+          fpair = 0.0;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

-        if (eflag)
-          evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg));
+        if (eflag) evdwl = factor_lj * prefactor[itype][jtype] * (1.0 + cos(arg));

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_sw_gpu.cpp
+++ b/src/GPU/pair_sw_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "gpu_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,26 +32,21 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int sw_gpu_init(const int ntypes, const int inum, const int nall,
-                const int max_nbors, const double cell_size, int &gpu_mode,
-                FILE *screen, double **ncutsq, double **ncut, double **sigma,
-                double **powerp, double **powerq, double **sigma_gamma,
-                double **c1, double **c2, double **c3,double **c4,
-                double **c5, double **c6, double ***lambda_epsilon,
-                double ***costheta, const int *map, int ***e2param);
+int sw_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
+                const double cell_size, int &gpu_mode, FILE *screen, double **ncutsq, double **ncut,
+                double **sigma, double **powerp, double **powerq, double **sigma_gamma, double **c1,
+                double **c2, double **c3, double **c4, double **c5, double **c6,
+                double ***lambda_epsilon, double ***costheta, const int *map, int ***e2param);
 void sw_gpu_clear();
-int ** sw_gpu_compute_n(const int ago, const int inum, const int nall,
-                        double **host_x, int *host_type, double *sublo,
-                        double *subhi, tagint *tag, int **nspecial,
-                        tagint **special, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        int **ilist, int **jnum,
-                        const double cpu_time, bool &success);
-void sw_gpu_compute(const int ago, const int nloc, const int nall,
-                    const int ln, double **host_x, int *host_type, int *ilist,
-                    int *numj, int **firstneigh, const bool eflag,
-                    const bool vflag, const bool eatom, const bool vatom,
-                    int &host_start, const double cpu_time, bool &success);
+int **sw_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                       const bool vatom, int &host_start, int **ilist, int **jnum,
+                       const double cpu_time, bool &success);
+void sw_gpu_compute(const int ago, const int nloc, const int nall, const int ln, double **host_x,
+                    int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                    const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                    const double cpu_time, bool &success);
 double sw_gpu_bytes();

 #define MAXLINE 1024
@ -79,15 +72,14 @@ PairSWGPU::PairSWGPU(LAMMPS *lmp) : PairSW(lmp), gpu_mode(GPU_FORCE)
 PairSWGPU::~PairSWGPU()
 {
  sw_gpu_clear();
-  if (allocated)
-    memory->destroy(cutghost);
+  if (allocated) memory->destroy(cutghost);
 }

 /* ---------------------------------------------------------------------- */

 void PairSWGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -95,7 +87,7 @@ void PairSWGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -104,28 +96,24 @@ void PairSWGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = sw_gpu_compute_n(neighbor->ago, inum, nall,
-                                   atom->x, atom->type, sublo,
-                                   subhi, atom->tag, atom->nspecial,
-                                   atom->special, eflag, vflag, eflag_atom,
-                                   vflag_atom, host_start,
-                                   &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        sw_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                         atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                         host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;

-    sw_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
-                   atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
-                   vflag, eflag_atom, vflag_atom, host_start, cpu_time,
+    sw_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type, ilist,
+                   numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
                   success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
 }

 /* ---------------------------------------------------------------------- */
@ -135,7 +123,7 @@ void PairSWGPU::allocate()
  PairSW::allocate();
  int n = atom->ntypes;

-  memory->create(cutghost,n+1,n+1,"pair:cutghost");
+  memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
 }

 /* ----------------------------------------------------------------------
@ -146,8 +134,7 @@ void PairSWGPU::init_style()
 {
  double cell_size = cutmax + neighbor->skin;

-  if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style sw/gpu requires atom IDs");
+  if (atom->tag_enable == 0) error->all(FLERR, "Pair style sw/gpu requires atom IDs");

  double **c1, **c2, **c3, **c4, **c5, **c6;
  double **ncutsq, **ncut, **sigma, **powerp, **powerq, **sigma_gamma;
@ -209,10 +196,9 @@ void PairSWGPU::init_style()
  }

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = sw_gpu_init(tp1, atom->nlocal, atom->nlocal+atom->nghost, mnf,
-                            cell_size, gpu_mode, screen, ncutsq, ncut, sigma,
-                            powerp, powerq, sigma_gamma,  c1, c2, c3, c4, c5,
-                            c6, lambda_epsilon, costheta, map, elem3param);
+  int success = sw_gpu_init(tp1, atom->nlocal, atom->nlocal + atom->nghost, mnf, cell_size,
+                            gpu_mode, screen, ncutsq, ncut, sigma, powerp, powerq, sigma_gamma, c1,
+                            c2, c3, c4, c5, c6, lambda_epsilon, costheta, map, elem3param);

  memory->destroy(ncutsq);
  memory->destroy(ncut);
@ -229,18 +215,13 @@ void PairSWGPU::init_style()
  memory->destroy(lambda_epsilon);
  memory->destroy(costheta);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->ghost = 1;
-  }
-  if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
-    comm->cutghostuser=2.0*cutmax + neighbor->skin;
-    if (comm->me == 0)
-       error->warning(FLERR,"Increasing communication cutoff for GPU style");
+  if (gpu_mode == GPU_FORCE)
+    neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
+  if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
+    comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
+    if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
  }
 }

@ -250,10 +231,9 @@ void PairSWGPU::init_style()

 double PairSWGPU::init_one(int i, int j)
 {
-  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
  cutghost[i][j] = cutmax;
  cutghost[j][i] = cutmax;

  return cutmax;
 }
-
--- a/src/GPU/pair_table_gpu.cpp
+++ b/src/GPU/pair_table_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
 #include "gpu_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -35,31 +33,25 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int table_gpu_init(const int ntypes, double **cutsq,
-                   double ***host_table_coeffs, double **host_table_data,
-                   double *special_lj, const int nlocal, const int nall,
-                   const int max_nbors, const int maxspecial,
-                   const double cell_size, int &gpu_mode, FILE *screen,
-                   int tabstyle, int ntables, int tablength);
+int table_gpu_init(const int ntypes, double **cutsq, double ***host_table_coeffs,
+                   double **host_table_data, double *special_lj, const int nlocal, const int nall,
+                   const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
+                   FILE *screen, int tabstyle, int ntables, int tablength);
 void table_gpu_clear();
-int ** table_gpu_compute_n(const int ago, const int inum, const int nall,
-                           double **host_x, int *host_type, double *sublo,
-                           double *subhi, tagint *tag, int **nspecial,
-                           tagint **special, const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           int **ilist, int **jnum, const double cpu_time,
-                           bool &success);
-void table_gpu_compute(const int ago, const int inum, const int nall,
-                       double **host_x, int *host_type, int *ilist, int *numj,
-                       int **firstneigh, const bool eflag, const bool vflag,
-                       const bool eatom, const bool vatom, int &host_start,
+int **table_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                          int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                          tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                          const bool vatom, int &host_start, int **ilist, int **jnum,
+                          const double cpu_time, bool &success);
+void table_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                       const bool vflag, const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double table_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairTableGPU::PairTableGPU(LAMMPS *lmp) : PairTable(lmp),
-                                          gpu_mode(GPU_FORCE)
+PairTableGPU::PairTableGPU(LAMMPS *lmp) : PairTable(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -81,7 +73,7 @@ PairTableGPU::~PairTableGPU()

 void PairTableGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -89,7 +81,7 @@ void PairTableGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -98,28 +90,24 @@ void PairTableGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = table_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
-                                     atom->type, sublo, subhi,
-                                     atom->tag, atom->nspecial, atom->special,
-                                     eflag, vflag, eflag_atom, vflag_atom,
-                                     host_start, &ilist, &numneigh, cpu_time,
-                                     success);
+    firstneigh =
+        table_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                            atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                            host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    table_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                      vflag_atom, host_start, cpu_time, success);
+    table_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                      eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -141,10 +129,9 @@ void PairTableGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -155,7 +142,7 @@ void PairTableGPU::init_style()
  // pack tables and send them to device
  double ***table_coeffs = nullptr;
  double **table_data = nullptr;
-  memory->create(table_coeffs, ntypes+1, ntypes+1, 6, "table:coeffs");
+  memory->create(table_coeffs, ntypes + 1, ntypes + 1, 6, "table:coeffs");

  Table *tb;
  for (int i = 1; i <= atom->ntypes; i++)
@ -171,67 +158,60 @@ void PairTableGPU::init_style()
    }

  if (tabstyle != BITMAP) {
-    memory->create(table_data, ntables, 6*tablength, "table:data");
+    memory->create(table_data, ntables, 6 * tablength, "table:data");
    for (int n = 0; n < ntables; n++) {
      tb = &tables[n];
      if (tabstyle == LOOKUP) {
-        for (int k = 0; k<tablength-1; k++) {
-          table_data[n][6*k+1] = tb->e[k];
-          table_data[n][6*k+2] = tb->f[k];
+        for (int k = 0; k < tablength - 1; k++) {
+          table_data[n][6 * k + 1] = tb->e[k];
+          table_data[n][6 * k + 2] = tb->f[k];
        }
      } else if (tabstyle == LINEAR) {
-        for (int k = 0; k<tablength; k++) {
-          table_data[n][6*k+0] = tb->rsq[k];
-          table_data[n][6*k+1] = tb->e[k];
-          table_data[n][6*k+2] = tb->f[k];
-          if (k<tablength-1) {
-            table_data[n][6*k+3] = tb->de[k];
-            table_data[n][6*k+4] = tb->df[k];
+        for (int k = 0; k < tablength; k++) {
+          table_data[n][6 * k + 0] = tb->rsq[k];
+          table_data[n][6 * k + 1] = tb->e[k];
+          table_data[n][6 * k + 2] = tb->f[k];
+          if (k < tablength - 1) {
+            table_data[n][6 * k + 3] = tb->de[k];
+            table_data[n][6 * k + 4] = tb->df[k];
          }
-       }
+        }
      } else if (tabstyle == SPLINE) {
-        for (int k = 0; k<tablength; k++) {
-          table_data[n][6*k+0] = tb->rsq[k];
-          table_data[n][6*k+1] = tb->e[k];
-          table_data[n][6*k+2] = tb->f[k];
-          table_data[n][6*k+3] = tb->e2[k];
-          table_data[n][6*k+4] = tb->f2[k];
+        for (int k = 0; k < tablength; k++) {
+          table_data[n][6 * k + 0] = tb->rsq[k];
+          table_data[n][6 * k + 1] = tb->e[k];
+          table_data[n][6 * k + 2] = tb->f[k];
+          table_data[n][6 * k + 3] = tb->e2[k];
+          table_data[n][6 * k + 4] = tb->f2[k];
        }
      }
    }
  } else {
    int ntable = 1 << tablength;
-    memory->create(table_data, ntables, 6*ntable, "table:data");
+    memory->create(table_data, ntables, 6 * ntable, "table:data");

    for (int n = 0; n < ntables; n++) {
      tb = &tables[n];
-      for (int k = 0; k<ntable; k++) {
-        table_data[n][6*k+0] = tb->rsq[k];
-        table_data[n][6*k+1] = tb->e[k];
-        table_data[n][6*k+2] = tb->f[k];
-        table_data[n][6*k+3] = tb->de[k];
-        table_data[n][6*k+4] = tb->df[k];
-        table_data[n][6*k+5] = tb->drsq[k];
+      for (int k = 0; k < ntable; k++) {
+        table_data[n][6 * k + 0] = tb->rsq[k];
+        table_data[n][6 * k + 1] = tb->e[k];
+        table_data[n][6 * k + 2] = tb->f[k];
+        table_data[n][6 * k + 3] = tb->de[k];
+        table_data[n][6 * k + 4] = tb->df[k];
+        table_data[n][6 * k + 5] = tb->drsq[k];
      }
    }
  }

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = table_gpu_init(atom->ntypes+1, cutsq, table_coeffs, table_data,
-                               force->special_lj, atom->nlocal,
-                               atom->nlocal+atom->nghost, mnf, maxspecial,
-                               cell_size, gpu_mode, screen, tabstyle, ntables,
-                               tablength);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success = table_gpu_init(atom->ntypes + 1, cutsq, table_coeffs, table_data, force->special_lj,
+                               atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial,
+                               cell_size, gpu_mode, screen, tabstyle, ntables, tablength);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
  memory->destroy(table_coeffs);
  memory->destroy(table_data);
 }
@ -246,11 +226,12 @@ double PairTableGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairTableGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                               int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype,itable;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,factor_lj,fraction,value,a,b;
+void PairTableGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                               int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype, itable;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, factor_lj, fraction, value, a, b;
  int *jlist;
  Table *tb;

@ -281,62 +262,58 @@ void PairTableGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
        tb = &tables[tabindex[itype][jtype]];
-        if (rsq < tb->innersq)
-          error->one(FLERR,"Pair distance < table inner cutoff");
+        if (rsq < tb->innersq) error->one(FLERR, "Pair distance < table inner cutoff");

        if (tabstyle == LOOKUP) {
-          itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
-          if (itable >= tlm1)
-            error->one(FLERR,"Pair distance > table outer cutoff");
+          itable = static_cast<int>((rsq - tb->innersq) * tb->invdelta);
+          if (itable >= tlm1) error->one(FLERR, "Pair distance > table outer cutoff");
          fpair = factor_lj * tb->f[itable];
        } else if (tabstyle == LINEAR) {
-          itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
-          if (itable >= tlm1)
-            error->one(FLERR,"Pair distance > table outer cutoff");
+          itable = static_cast<int>((rsq - tb->innersq) * tb->invdelta);
+          if (itable >= tlm1) error->one(FLERR, "Pair distance > table outer cutoff");
          fraction = (rsq - tb->rsq[itable]) * tb->invdelta;
-          value = tb->f[itable] + fraction*tb->df[itable];
+          value = tb->f[itable] + fraction * tb->df[itable];
          fpair = factor_lj * value;
        } else if (tabstyle == SPLINE) {
-          itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
-          if (itable >= tlm1)
-            error->one(FLERR,"Pair distance > table outer cutoff");
+          itable = static_cast<int>((rsq - tb->innersq) * tb->invdelta);
+          if (itable >= tlm1) error->one(FLERR, "Pair distance > table outer cutoff");
          b = (rsq - tb->rsq[itable]) * tb->invdelta;
          a = 1.0 - b;
-          value = a * tb->f[itable] + b * tb->f[itable+1] +
-            ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) *
-            tb->deltasq6;
+          value = a * tb->f[itable] + b * tb->f[itable + 1] +
+              ((a * a * a - a) * tb->f2[itable] + (b * b * b - b) * tb->f2[itable + 1]) *
+                  tb->deltasq6;
          fpair = factor_lj * value;
        } else {
          rsq_lookup.f = rsq;
          itable = rsq_lookup.i & tb->nmask;
          itable >>= tb->nshiftbits;
          fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable];
-          value = tb->f[itable] + fraction*tb->df[itable];
+          value = tb->f[itable] + fraction * tb->df[itable];
          fpair = factor_lj * value;
        }

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          if (tabstyle == LOOKUP)
            evdwl = tb->e[itable];
          else if (tabstyle == LINEAR || tabstyle == BITMAP)
-            evdwl = tb->e[itable] + fraction*tb->de[itable];
+            evdwl = tb->e[itable] + fraction * tb->de[itable];
          else
-            evdwl = a * tb->e[itable] + b * tb->e[itable+1] +
-              ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) *
-              tb->deltasq6;
+            evdwl = a * tb->e[itable] + b * tb->e[itable + 1] +
+                ((a * a * a - a) * tb->e2[itable] + (b * b * b - b) * tb->e2[itable + 1]) *
+                    tb->deltasq6;
          evdwl *= factor_lj;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_tersoff_gpu.cpp
+++ b/src/GPU/pair_tersoff_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -25,42 +24,34 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "memory.h"
-#include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
+#include "neighbor.h"
 #include "suffix.h"

 using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int tersoff_gpu_init(const int ntypes, const int inum, const int nall,
-                     const int max_nbors, const double cell_size, int &gpu_mode,
-                     FILE *screen, int* host_map, const int nelements,
-                     int*** host_elem3param, const int nparams,
-                     const double* ts_lam1, const double* ts_lam2,
-                     const double* ts_lam3, const double* ts_powermint,
-                     const double* ts_biga, const double* ts_bigb,
-                     const double* ts_bigr, const double* ts_bigd,
-                     const double* ts_c1, const double* ts_c2,
-                     const double* ts_c3, const double* ts_c4,
-                     const double* ts_c, const double* ts_d,
-                     const double* ts_h, const double* ts_gamma,
-                     const double* ts_beta, const double* ts_powern,
-                     const double* ts_cutsq);
+int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
+                     const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
+                     const int nelements, int ***host_elem3param, const int nparams,
+                     const double *ts_lam1, const double *ts_lam2, const double *ts_lam3,
+                     const double *ts_powermint, const double *ts_biga, const double *ts_bigb,
+                     const double *ts_bigr, const double *ts_bigd, const double *ts_c1,
+                     const double *ts_c2, const double *ts_c3, const double *ts_c4,
+                     const double *ts_c, const double *ts_d, const double *ts_h,
+                     const double *ts_gamma, const double *ts_beta, const double *ts_powern,
+                     const double *ts_cutsq);
 void tersoff_gpu_clear();
-int ** tersoff_gpu_compute_n(const int ago, const int inum_full,
-                    const int nall, double **host_x, int *host_type,
-                    double *sublo, double *subhi, tagint *tag, int **nspecial,
-                    tagint **special, const bool eflag, const bool vflag,
-                    const bool eatom, const bool vatom, int &host_start,
-                    int **ilist, int **jnum, const double cpu_time,
-                    bool &success);
-void tersoff_gpu_compute(const int ago, const int nlocal, const int nall,
-                    const int nlist, double **host_x, int *host_type,
-                    int *ilist, int *numj, int **firstneigh, const bool eflag,
-                    const bool vflag, const bool eatom, const bool vatom,
-                    int &host_start, const double cpu_time, bool &success);
+int **tersoff_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                            int *host_type, double *sublo, double *subhi, tagint *tag,
+                            int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                            const bool eatom, const bool vatom, int &host_start, int **ilist,
+                            int **jnum, const double cpu_time, bool &success);
+void tersoff_gpu_compute(const int ago, const int nlocal, const int nall, const int nlist,
+                         double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
+                         const bool eflag, const bool vflag, const bool eatom, const bool vatom,
+                         int &host_start, const double cpu_time, bool &success);
 double tersoff_gpu_bytes();

 #define MAXLINE 1024
@ -85,15 +76,14 @@ PairTersoffGPU::PairTersoffGPU(LAMMPS *lmp) : PairTersoff(lmp), gpu_mode(GPU_FOR
 PairTersoffGPU::~PairTersoffGPU()
 {
  tersoff_gpu_clear();
-  if (allocated)
-    memory->destroy(cutghost);
+  if (allocated) memory->destroy(cutghost);
 }

 /* ---------------------------------------------------------------------- */

 void PairTersoffGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -101,7 +91,7 @@ void PairTersoffGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -110,28 +100,24 @@ void PairTersoffGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = tersoff_gpu_compute_n(neighbor->ago, inum, nall,
-                                  atom->x, atom->type, sublo,
-                                  subhi, atom->tag, atom->nspecial,
-                                  atom->special, eflag, vflag, eflag_atom,
-                                  vflag_atom, host_start,
-                                  &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        tersoff_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                              atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
+                              vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;

-    tersoff_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
-                   atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
-                   vflag, eflag_atom, vflag_atom, host_start, cpu_time,
-                   success);
+    tersoff_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type, ilist,
+                        numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start,
+                        cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
 }

 /* ---------------------------------------------------------------------- */
@ -141,7 +127,7 @@ void PairTersoffGPU::allocate()
  PairTersoff::allocate();
  int n = atom->ntypes;

-  memory->create(cutghost,n+1,n+1,"pair:cutghost");
+  memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
 }

 /* ----------------------------------------------------------------------
@ -152,8 +138,7 @@ void PairTersoffGPU::init_style()
 {
  double cell_size = cutmax + neighbor->skin;

-  if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style tersoff/gpu requires atom IDs");
+  if (atom->tag_enable == 0) error->all(FLERR, "Pair style tersoff/gpu requires atom IDs");

  double *lam1, *lam2, *lam3, *powermint;
  double *biga, *bigb, *bigr, *bigd;
@ -166,25 +151,25 @@ void PairTersoffGPU::init_style()
  c = d = h = gamma = nullptr;
  beta = powern = _cutsq = nullptr;

-  memory->create(lam1,nparams,"pair:lam1");
-  memory->create(lam2,nparams,"pair:lam2");
-  memory->create(lam3,nparams,"pair:lam3");
-  memory->create(powermint,nparams,"pair:powermint");
-  memory->create(biga,nparams,"pair:biga");
-  memory->create(bigb,nparams,"pair:bigb");
-  memory->create(bigr,nparams,"pair:bigr");
-  memory->create(bigd,nparams,"pair:bigd");
-  memory->create(c1,nparams,"pair:c1");
-  memory->create(c2,nparams,"pair:c2");
-  memory->create(c3,nparams,"pair:c3");
-  memory->create(c4,nparams,"pair:c4");
-  memory->create(c,nparams,"pair:c");
-  memory->create(d,nparams,"pair:d");
-  memory->create(h,nparams,"pair:h");
-  memory->create(gamma,nparams,"pair:gamma");
-  memory->create(beta,nparams,"pair:beta");
-  memory->create(powern,nparams,"pair:powern");
-  memory->create(_cutsq,nparams,"pair:_cutsq");
+  memory->create(lam1, nparams, "pair:lam1");
+  memory->create(lam2, nparams, "pair:lam2");
+  memory->create(lam3, nparams, "pair:lam3");
+  memory->create(powermint, nparams, "pair:powermint");
+  memory->create(biga, nparams, "pair:biga");
+  memory->create(bigb, nparams, "pair:bigb");
+  memory->create(bigr, nparams, "pair:bigr");
+  memory->create(bigd, nparams, "pair:bigd");
+  memory->create(c1, nparams, "pair:c1");
+  memory->create(c2, nparams, "pair:c2");
+  memory->create(c3, nparams, "pair:c3");
+  memory->create(c4, nparams, "pair:c4");
+  memory->create(c, nparams, "pair:c");
+  memory->create(d, nparams, "pair:d");
+  memory->create(h, nparams, "pair:h");
+  memory->create(gamma, nparams, "pair:gamma");
+  memory->create(beta, nparams, "pair:beta");
+  memory->create(powern, nparams, "pair:powern");
+  memory->create(_cutsq, nparams, "pair:_cutsq");

  for (int i = 0; i < nparams; i++) {
    lam1[i] = params[i].lam1;
@ -209,13 +194,10 @@ void PairTersoffGPU::init_style()
  }

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = tersoff_gpu_init(atom->ntypes+1, atom->nlocal,
-                                 atom->nlocal+atom->nghost, mnf,
-                                 cell_size, gpu_mode, screen, map, nelements,
-                                 elem3param, nparams, lam1, lam2, lam3,
-                                 powermint, biga, bigb, bigr, bigd,
-                                 c1, c2, c3, c4, c, d, h, gamma,
-                                 beta, powern, _cutsq);
+  int success = tersoff_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                                 cell_size, gpu_mode, screen, map, nelements, elem3param, nparams,
+                                 lam1, lam2, lam3, powermint, biga, bigb, bigr, bigd, c1, c2, c3,
+                                 c4, c, d, h, gamma, beta, powern, _cutsq);

  memory->destroy(lam1);
  memory->destroy(lam2);
@ -237,18 +219,13 @@ void PairTersoffGPU::init_style()
  memory->destroy(powern);
  memory->destroy(_cutsq);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->ghost = 1;
-  }
-  if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
-    comm->cutghostuser = 2.0*cutmax + neighbor->skin;
-    if (comm->me == 0)
-       error->warning(FLERR,"Increasing communication cutoff for GPU style");
+  if (gpu_mode == GPU_FORCE)
+    neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
+  if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
+    comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
+    if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
  }
 }

@ -258,10 +235,9 @@ void PairTersoffGPU::init_style()

 double PairTersoffGPU::init_one(int i, int j)
 {
-  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
  cutghost[i][j] = cutmax;
  cutghost[j][i] = cutmax;

  return cutmax;
 }
-
--- a/src/GPU/pair_tersoff_mod_gpu.cpp
+++ b/src/GPU/pair_tersoff_mod_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "gpu_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,35 +32,33 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
-  const int max_nbors, const double cell_size, int &gpu_mode, FILE *screen,
-  int* host_map, const int nelements, int*** host_elem3param, const int nparams,
-  const double* ts_lam1, const double* ts_lam2, const double* ts_lam3,
-  const double* ts_powermint, const double* ts_biga, const double* ts_bigb,
-  const double* ts_bigr, const double* ts_bigd, const double* ts_c1,
-  const double* ts_c2, const double* ts_c3, const double* ts_c4,
-  const double* ts_c5, const double* ts_h, const double* ts_beta,
-  const double* ts_powern, const double* ts_powern_del,
-  const double* ts_ca1, const double* ts_cutsq);
+int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
+                         const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
+                         const int nelements, int ***host_elem3param, const int nparams,
+                         const double *ts_lam1, const double *ts_lam2, const double *ts_lam3,
+                         const double *ts_powermint, const double *ts_biga, const double *ts_bigb,
+                         const double *ts_bigr, const double *ts_bigd, const double *ts_c1,
+                         const double *ts_c2, const double *ts_c3, const double *ts_c4,
+                         const double *ts_c5, const double *ts_h, const double *ts_beta,
+                         const double *ts_powern, const double *ts_powern_del, const double *ts_ca1,
+                         const double *ts_cutsq);
 void tersoff_mod_gpu_clear();
-int ** tersoff_mod_gpu_compute_n(const int ago, const int inum_full,
-                    const int nall, double **host_x, int *host_type,
-                    double *sublo, double *subhi, tagint *tag, int **nspecial,
-                    tagint **special, const bool eflag, const bool vflag,
-                    const bool eatom, const bool vatom, int &host_start,
-                    int **ilist, int **jnum, const double cpu_time,
-                    bool &success);
-void tersoff_mod_gpu_compute(const int ago, const int nlocal, const int nall,
-                    const int nlist, double **host_x, int *host_type,
-                    int *ilist, int *numj, int **firstneigh, const bool eflag,
-                    const bool vflag, const bool eatom, const bool vatom,
-                    int &host_start, const double cpu_time, bool &success);
+int **tersoff_mod_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                                int *host_type, double *sublo, double *subhi, tagint *tag,
+                                int **nspecial, tagint **special, const bool eflag,
+                                const bool vflag, const bool eatom, const bool vatom,
+                                int &host_start, int **ilist, int **jnum, const double cpu_time,
+                                bool &success);
+void tersoff_mod_gpu_compute(const int ago, const int nlocal, const int nall, const int nlist,
+                             double **host_x, int *host_type, int *ilist, int *numj,
+                             int **firstneigh, const bool eflag, const bool vflag, const bool eatom,
+                             const bool vatom, int &host_start, const double cpu_time,
+                             bool &success);
 double tersoff_mod_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairTersoffMODGPU::PairTersoffMODGPU(LAMMPS *lmp) : PairTersoffMOD(lmp),
-  gpu_mode(GPU_FORCE)
+PairTersoffMODGPU::PairTersoffMODGPU(LAMMPS *lmp) : PairTersoffMOD(lmp), gpu_mode(GPU_FORCE)
 {
  cpu_time = 0.0;
  suffix_flag |= Suffix::GPU;
@ -79,15 +75,14 @@ PairTersoffMODGPU::PairTersoffMODGPU(LAMMPS *lmp) : PairTersoffMOD(lmp),
 PairTersoffMODGPU::~PairTersoffMODGPU()
 {
  tersoff_mod_gpu_clear();
-  if (allocated)
-    memory->destroy(cutghost);
+  if (allocated) memory->destroy(cutghost);
 }

 /* ---------------------------------------------------------------------- */

 void PairTersoffMODGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -95,7 +90,7 @@ void PairTersoffMODGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -104,28 +99,24 @@ void PairTersoffMODGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = tersoff_mod_gpu_compute_n(neighbor->ago, inum, nall,
-                                  atom->x, atom->type, sublo,
-                                  subhi, atom->tag, atom->nspecial,
-                                  atom->special, eflag, vflag, eflag_atom,
-                                  vflag_atom, host_start,
-                                  &ilist, &numneigh, cpu_time, success);
+    firstneigh = tersoff_mod_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
+                                           subhi, atom->tag, atom->nspecial, atom->special, eflag,
+                                           vflag, eflag_atom, vflag_atom, host_start, &ilist,
+                                           &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;

-    tersoff_mod_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
-                   atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
-                   vflag, eflag_atom, vflag_atom, host_start, cpu_time,
-                   success);
+    tersoff_mod_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type,
+                            ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom,
+                            host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
 }

 /* ---------------------------------------------------------------------- */
@ -135,7 +126,7 @@ void PairTersoffMODGPU::allocate()
  PairTersoffMOD::allocate();
  int n = atom->ntypes;

-  memory->create(cutghost,n+1,n+1,"pair:cutghost");
+  memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
 }

 /* ----------------------------------------------------------------------
@ -146,8 +137,7 @@ void PairTersoffMODGPU::init_style()
 {
  double cell_size = cutmax + neighbor->skin;

-  if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style tersoff/mod/gpu requires atom IDs");
+  if (atom->tag_enable == 0) error->all(FLERR, "Pair style tersoff/mod/gpu requires atom IDs");

  double *lam1, *lam2, *lam3, *powermint;
  double *biga, *bigb, *bigr, *bigd;
@ -159,25 +149,25 @@ void PairTersoffMODGPU::init_style()
  c1 = c2 = c3 = c4 = c5 = h = nullptr;
  beta = powern = _cutsq = nullptr;

-  memory->create(lam1,nparams,"pair:lam1");
-  memory->create(lam2,nparams,"pair:lam2");
-  memory->create(lam3,nparams,"pair:lam3");
-  memory->create(powermint,nparams,"pair:powermint");
-  memory->create(biga,nparams,"pair:biga");
-  memory->create(bigb,nparams,"pair:bigb");
-  memory->create(bigr,nparams,"pair:bigr");
-  memory->create(bigd,nparams,"pair:bigd");
-  memory->create(c1,nparams,"pair:c1");
-  memory->create(c2,nparams,"pair:c2");
-  memory->create(c3,nparams,"pair:c3");
-  memory->create(c4,nparams,"pair:c4");
-  memory->create(c5,nparams,"pair:c5");
-  memory->create(h,nparams,"pair:h");
-  memory->create(beta,nparams,"pair:beta");
-  memory->create(powern,nparams,"pair:powern");
-  memory->create(powern_del,nparams,"pair:powern_del");
-  memory->create(ca1,nparams,"pair:ca1");
-  memory->create(_cutsq,nparams,"pair:_cutsq");
+  memory->create(lam1, nparams, "pair:lam1");
+  memory->create(lam2, nparams, "pair:lam2");
+  memory->create(lam3, nparams, "pair:lam3");
+  memory->create(powermint, nparams, "pair:powermint");
+  memory->create(biga, nparams, "pair:biga");
+  memory->create(bigb, nparams, "pair:bigb");
+  memory->create(bigr, nparams, "pair:bigr");
+  memory->create(bigd, nparams, "pair:bigd");
+  memory->create(c1, nparams, "pair:c1");
+  memory->create(c2, nparams, "pair:c2");
+  memory->create(c3, nparams, "pair:c3");
+  memory->create(c4, nparams, "pair:c4");
+  memory->create(c5, nparams, "pair:c5");
+  memory->create(h, nparams, "pair:h");
+  memory->create(beta, nparams, "pair:beta");
+  memory->create(powern, nparams, "pair:powern");
+  memory->create(powern_del, nparams, "pair:powern_del");
+  memory->create(ca1, nparams, "pair:ca1");
+  memory->create(_cutsq, nparams, "pair:_cutsq");

  for (int i = 0; i < nparams; i++) {
    lam1[i] = params[i].lam1;
@ -202,13 +192,10 @@ void PairTersoffMODGPU::init_style()
  }

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = tersoff_mod_gpu_init(atom->ntypes+1, atom->nlocal,
-                                 atom->nlocal+atom->nghost, mnf,
-                                 cell_size, gpu_mode, screen, map, nelements,
-                                 elem3param, nparams, lam1, lam2, lam3,
-                                 powermint, biga, bigb, bigr, bigd,
-                                 c1, c2, c3, c4, c5, h, beta, powern,
-                                 powern_del, ca1, _cutsq);
+  int success = tersoff_mod_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost,
+                                     mnf, cell_size, gpu_mode, screen, map, nelements, elem3param,
+                                     nparams, lam1, lam2, lam3, powermint, biga, bigb, bigr, bigd,
+                                     c1, c2, c3, c4, c5, h, beta, powern, powern_del, ca1, _cutsq);

  memory->destroy(lam1);
  memory->destroy(lam2);
@ -230,18 +217,13 @@ void PairTersoffMODGPU::init_style()
  memory->destroy(powern_del);
  memory->destroy(_cutsq);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->ghost = 1;
-  }
-  if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
-    comm->cutghostuser = 2.0*cutmax + neighbor->skin;
-    if (comm->me == 0)
-       error->warning(FLERR,"Increasing communication cutoff for GPU style");
+  if (gpu_mode == GPU_FORCE)
+    neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
+  if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
+    comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
+    if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
  }
 }

@ -251,10 +233,9 @@ void PairTersoffMODGPU::init_style()

 double PairTersoffMODGPU::init_one(int i, int j)
 {
-  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
  cutghost[i][j] = cutmax;
  cutghost[j][i] = cutmax;

  return cutmax;
 }
-
--- a/src/GPU/pair_tersoff_zbl_gpu.cpp
+++ b/src/GPU/pair_tersoff_zbl_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -34,42 +33,36 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
-                     const int max_nbors, const double cell_size, int &gpu_mode,
-                     FILE *screen, int* host_map, const int nelements,
-                     int*** host_elem3param, const int nparams,
-                     const double* ts_lam1, const double* ts_lam2,
-                     const double* ts_lam3, const double* ts_powermint,
-                     const double* ts_biga, const double* ts_bigb,
-                     const double* ts_bigr, const double* ts_bigd,
-                     const double* ts_c1, const double* ts_c2,
-                     const double* ts_c3, const double* ts_c4,
-                     const double* ts_c, const double* ts_d,
-                     const double* ts_h, const double* ts_gamma,
-                     const double* ts_beta, const double* ts_powern,
-                     const double* ts_Z_i, const double* ts_Z_j,
-                     const double* ts_ZBLcut, const double* ts_ZBLexpscale,
-                     const double global_e, const double global_a_0,
-                     const double global_epsilon_0, const double* ts_cutsq);
+int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
+                         const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
+                         const int nelements, int ***host_elem3param, const int nparams,
+                         const double *ts_lam1, const double *ts_lam2, const double *ts_lam3,
+                         const double *ts_powermint, const double *ts_biga, const double *ts_bigb,
+                         const double *ts_bigr, const double *ts_bigd, const double *ts_c1,
+                         const double *ts_c2, const double *ts_c3, const double *ts_c4,
+                         const double *ts_c, const double *ts_d, const double *ts_h,
+                         const double *ts_gamma, const double *ts_beta, const double *ts_powern,
+                         const double *ts_Z_i, const double *ts_Z_j, const double *ts_ZBLcut,
+                         const double *ts_ZBLexpscale, const double global_e,
+                         const double global_a_0, const double global_epsilon_0,
+                         const double *ts_cutsq);
 void tersoff_zbl_gpu_clear();
-int ** tersoff_zbl_gpu_compute_n(const int ago, const int inum_full,
-                    const int nall, double **host_x, int *host_type,
-                    double *sublo, double *subhi, tagint *tag, int **nspecial,
-                    tagint **special, const bool eflag, const bool vflag,
-                    const bool eatom, const bool vatom, int &host_start,
-                    int **ilist, int **jnum, const double cpu_time,
-                    bool &success);
-void tersoff_zbl_gpu_compute(const int ago, const int nlocal, const int nall,
-                    const int nlist, double **host_x, int *host_type,
-                    int *ilist, int *numj, int **firstneigh, const bool eflag,
-                    const bool vflag, const bool eatom, const bool vatom,
-                    int &host_start, const double cpu_time, bool &success);
+int **tersoff_zbl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                                int *host_type, double *sublo, double *subhi, tagint *tag,
+                                int **nspecial, tagint **special, const bool eflag,
+                                const bool vflag, const bool eatom, const bool vatom,
+                                int &host_start, int **ilist, int **jnum, const double cpu_time,
+                                bool &success);
+void tersoff_zbl_gpu_compute(const int ago, const int nlocal, const int nall, const int nlist,
+                             double **host_x, int *host_type, int *ilist, int *numj,
+                             int **firstneigh, const bool eflag, const bool vflag, const bool eatom,
+                             const bool vatom, int &host_start, const double cpu_time,
+                             bool &success);
 double tersoff_zbl_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp),
-  gpu_mode(GPU_FORCE)
+PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp), gpu_mode(GPU_FORCE)
 {
  cpu_time = 0.0;
  suffix_flag |= Suffix::GPU;
@ -86,15 +79,14 @@ PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp),
 PairTersoffZBLGPU::~PairTersoffZBLGPU()
 {
  tersoff_zbl_gpu_clear();
-  if (allocated)
-    memory->destroy(cutghost);
+  if (allocated) memory->destroy(cutghost);
 }

 /* ---------------------------------------------------------------------- */

 void PairTersoffZBLGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -102,7 +94,7 @@ void PairTersoffZBLGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -111,28 +103,24 @@ void PairTersoffZBLGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = tersoff_zbl_gpu_compute_n(neighbor->ago, inum, nall,
-                                  atom->x, atom->type, sublo,
-                                  subhi, atom->tag, atom->nspecial,
-                                  atom->special, eflag, vflag, eflag_atom,
-                                  vflag_atom, host_start,
-                                  &ilist, &numneigh, cpu_time, success);
+    firstneigh = tersoff_zbl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
+                                           subhi, atom->tag, atom->nspecial, atom->special, eflag,
+                                           vflag, eflag_atom, vflag_atom, host_start, &ilist,
+                                           &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;

-    tersoff_zbl_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
-                   atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
-                   vflag, eflag_atom, vflag_atom, host_start, cpu_time,
-                   success);
+    tersoff_zbl_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type,
+                            ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom,
+                            host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
 }

 /* ---------------------------------------------------------------------- */
@ -140,9 +128,9 @@ void PairTersoffZBLGPU::compute(int eflag, int vflag)
 void PairTersoffZBLGPU::allocate()
 {
  PairTersoffZBL::allocate();
-  int n = atom->ntypes;
+  int np1 = atom->ntypes + 1;

-  memory->create(cutghost,n+1,n+1,"pair:cutghost");
+  memory->create(cutghost, np1, np1, "pair:cutghost");
 }

 /* ----------------------------------------------------------------------
@ -153,8 +141,7 @@ void PairTersoffZBLGPU::init_style()
 {
  double cell_size = cutmax + neighbor->skin;

-  if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style tersoff/zbl/gpu requires atom IDs");
+  if (atom->tag_enable == 0) error->all(FLERR, "Pair style tersoff/zbl/gpu requires atom IDs");

  double *lam1, *lam2, *lam3, *powermint;
  double *biga, *bigb, *bigr, *bigd;
@ -167,29 +154,29 @@ void PairTersoffZBLGPU::init_style()
  c = d = h = gamma = nullptr;
  beta = powern = Z_i = Z_j = ZBLcut = ZBLexpscale = _cutsq = nullptr;

-  memory->create(lam1,nparams,"pair:lam1");
-  memory->create(lam2,nparams,"pair:lam2");
-  memory->create(lam3,nparams,"pair:lam3");
-  memory->create(powermint,nparams,"pair:powermint");
-  memory->create(biga,nparams,"pair:biga");
-  memory->create(bigb,nparams,"pair:bigb");
-  memory->create(bigr,nparams,"pair:bigr");
-  memory->create(bigd,nparams,"pair:bigd");
-  memory->create(c1,nparams,"pair:c1");
-  memory->create(c2,nparams,"pair:c2");
-  memory->create(c3,nparams,"pair:c3");
-  memory->create(c4,nparams,"pair:c4");
-  memory->create(c,nparams,"pair:c");
-  memory->create(d,nparams,"pair:d");
-  memory->create(h,nparams,"pair:h");
-  memory->create(gamma,nparams,"pair:gamma");
-  memory->create(beta,nparams,"pair:beta");
-  memory->create(powern,nparams,"pair:powern");
-  memory->create(Z_i,nparams,"pair:Z_i");
-  memory->create(Z_j,nparams,"pair:Z_j");
-  memory->create(ZBLcut,nparams,"pair:ZBLcut");
-  memory->create(ZBLexpscale,nparams,"pair:ZBLexpscale");
-  memory->create(_cutsq,nparams,"pair:_cutsq");
+  memory->create(lam1, nparams, "pair:lam1");
+  memory->create(lam2, nparams, "pair:lam2");
+  memory->create(lam3, nparams, "pair:lam3");
+  memory->create(powermint, nparams, "pair:powermint");
+  memory->create(biga, nparams, "pair:biga");
+  memory->create(bigb, nparams, "pair:bigb");
+  memory->create(bigr, nparams, "pair:bigr");
+  memory->create(bigd, nparams, "pair:bigd");
+  memory->create(c1, nparams, "pair:c1");
+  memory->create(c2, nparams, "pair:c2");
+  memory->create(c3, nparams, "pair:c3");
+  memory->create(c4, nparams, "pair:c4");
+  memory->create(c, nparams, "pair:c");
+  memory->create(d, nparams, "pair:d");
+  memory->create(h, nparams, "pair:h");
+  memory->create(gamma, nparams, "pair:gamma");
+  memory->create(beta, nparams, "pair:beta");
+  memory->create(powern, nparams, "pair:powern");
+  memory->create(Z_i, nparams, "pair:Z_i");
+  memory->create(Z_j, nparams, "pair:Z_j");
+  memory->create(ZBLcut, nparams, "pair:ZBLcut");
+  memory->create(ZBLexpscale, nparams, "pair:ZBLexpscale");
+  memory->create(_cutsq, nparams, "pair:_cutsq");

  for (int i = 0; i < nparams; i++) {
    lam1[i] = params[i].lam1;
@ -218,14 +205,11 @@ void PairTersoffZBLGPU::init_style()
  }

  int mnf = 5e-2 * neighbor->oneatom;
-  int success = tersoff_zbl_gpu_init(atom->ntypes+1, atom->nlocal,
-                                 atom->nlocal+atom->nghost, mnf,
-                                 cell_size, gpu_mode, screen, map, nelements,
-                                 elem3param, nparams, lam1, lam2, lam3,
-                                 powermint, biga, bigb, bigr, bigd,
-                                 c1, c2, c3, c4, c, d, h, gamma,
-                                 beta, powern, Z_i, Z_j, ZBLcut, ZBLexpscale,
-                                 global_e, global_a_0, global_epsilon_0, _cutsq);
+  int success = tersoff_zbl_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost,
+                                     mnf, cell_size, gpu_mode, screen, map, nelements, elem3param,
+                                     nparams, lam1, lam2, lam3, powermint, biga, bigb, bigr, bigd,
+                                     c1, c2, c3, c4, c, d, h, gamma, beta, powern, Z_i, Z_j, ZBLcut,
+                                     ZBLexpscale, global_e, global_a_0, global_epsilon_0, _cutsq);

  memory->destroy(lam1);
  memory->destroy(lam2);
@ -251,18 +235,13 @@ void PairTersoffZBLGPU::init_style()
  memory->destroy(ZBLexpscale);
  memory->destroy(_cutsq);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->ghost = 1;
-  }
-  if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
-    comm->cutghostuser = 2.0*cutmax + neighbor->skin;
-    if (comm->me == 0)
-       error->warning(FLERR,"Increasing communication cutoff for GPU style");
+  if (gpu_mode == GPU_FORCE)
+    neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
+  if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
+    comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
+    if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
  }
 }

@ -272,10 +251,9 @@ void PairTersoffZBLGPU::init_style()

 double PairTersoffZBLGPU::init_one(int i, int j)
 {
-  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
  cutghost[i][j] = cutmax;
  cutghost[j][i] = cutmax;

  return cutmax;
 }
-
--- a/src/GPU/pair_ufm_gpu.cpp
+++ b/src/GPU/pair_ufm_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* -*- c++ -*- ----------------------------------------------------------
 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -36,27 +34,23 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
-                  double **host_uf2, double **host_uf3,
-                  double **offset, double *special_lj, const int nlocal,
-                  const int nall, const int max_nbors, const int maxspecial,
-                  const double cell_size, int &gpu_mode, FILE *screen);
+int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, double **host_uf2,
+                  double **host_uf3, double **offset, double *special_lj, const int nlocal,
+                  const int nall, const int max_nbors, const int maxspecial, const double cell_size,
+                  int &gpu_mode, FILE *screen);

-void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
-                     double **host_uf2, double **host_uf3, double **offset);
+void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1, double **host_uf2,
+                     double **host_uf3, double **offset);

 void ufml_gpu_clear();
-int ** ufml_gpu_compute_n(const int ago, const int inum, const int nall,
-                          double **host_x, int *host_type, double *sublo,
-                          double *subhi, tagint *tag, int **nspecial,
-                          tagint **special, const bool eflag, const bool vflag,
-                          const bool eatom, const bool vatom, int &host_start,
-                          int **ilist, int **jnum,
-                          const double cpu_time, bool &success);
-void ufml_gpu_compute(const int ago, const int inum, const int nall,
-                      double **host_x, int *host_type, int *ilist, int *numj,
-                      int **firstneigh, const bool eflag, const bool vflag,
-                      const bool eatom, const bool vatom, int &host_start,
+int **ufml_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void ufml_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double ufml_gpu_bytes();

@ -83,7 +77,7 @@ PairUFMGPU::~PairUFMGPU()

 void PairUFMGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -91,7 +85,7 @@ void PairUFMGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -100,28 +94,24 @@ void PairUFMGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ufml_gpu_compute_n(neighbor->ago, inum, nall,
-                                   atom->x, atom->type, sublo,
-                                   subhi, atom->tag, atom->nspecial,
-                                   atom->special, eflag, vflag, eflag_atom,
-                                   vflag_atom, host_start,
-                                   &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        ufml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                           atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                           host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ufml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success);
+    ufml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -134,8 +124,7 @@ void PairUFMGPU::compute(int eflag, int vflag)

 void PairUFMGPU::init_style()
 {
-//  cut_respa = nullptr;
-
+  //  cut_respa = nullptr;

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -143,10 +132,9 @@ void PairUFMGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -154,21 +142,15 @@ void PairUFMGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ufml_gpu_init(atom->ntypes+1, cutsq, uf1, uf2, uf3,
-                             offset, force->special_lj, atom->nlocal,
-                             atom->nlocal+atom->nghost, mnf, maxspecial,
-                             cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      ufml_gpu_init(atom->ntypes + 1, cutsq, uf1, uf2, uf3, offset, force->special_lj, atom->nlocal,
+                    atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -177,7 +159,7 @@ void PairUFMGPU::reinit()
 {
  Pair::reinit();

-  ufml_gpu_reinit(atom->ntypes+1, cutsq, uf1, uf2, uf3, offset);
+  ufml_gpu_reinit(atom->ntypes + 1, cutsq, uf1, uf2, uf3, offset);
 }

 /* ---------------------------------------------------------------------- */
@ -190,11 +172,12 @@ double PairUFMGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                             int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,expuf,factor_lj;
+void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                             int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, expuf, factor_lj;
  int *jlist;

  double **x = atom->x;
@ -202,7 +185,6 @@ void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
  int *type = atom->type;
  double *special_lj = force->special_lj;

-
  // loop over neighbors of my atoms

  for (ii = start; ii < inum; ii++) {
@ -222,22 +204,22 @@ void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        expuf = exp(- rsq * uf2[itype][jtype]);
-        fpair = factor_lj * uf1[itype][jtype] * expuf /(1.0 - expuf);
+        expuf = exp(-rsq * uf2[itype][jtype]);
+        fpair = factor_lj * uf1[itype][jtype] * expuf / (1.0 - expuf);

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          evdwl = -factor_lj * uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype];
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_vashishta_gpu.cpp
+++ b/src/GPU/pair_vashishta_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
 #include "gpu_extra.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,33 +32,25 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int vashishta_gpu_init(const int ntypes, const int inum, const int nall,
-                       const int max_nbors, const double cell_size,
-                       int &gpu_mode, FILE *screen, int* host_map,
-                       const int nelements, int*** host_elem3param,
-                       const int nparams, const double* cutsq, const double* r0,
-                       const double* gamma, const double* eta,
-                       const double* lam1inv, const double* lam4inv,
-                       const double* zizj, const double* mbigd,
-                       const double* dvrc, const double* big6w,
-                       const double* heta, const double* bigh,
-                       const double* bigw, const double* c0,
-                       const double* costheta, const double* bigb,
-                       const double* big2b, const double* bigc);
+int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
+                       const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
+                       const int nelements, int ***host_elem3param, const int nparams,
+                       const double *cutsq, const double *r0, const double *gamma,
+                       const double *eta, const double *lam1inv, const double *lam4inv,
+                       const double *zizj, const double *mbigd, const double *dvrc,
+                       const double *big6w, const double *heta, const double *bigh,
+                       const double *bigw, const double *c0, const double *costheta,
+                       const double *bigb, const double *big2b, const double *bigc);
 void vashishta_gpu_clear();
-int ** vashishta_gpu_compute_n(const int ago, const int inum, const int nall,
-                        double **host_x, int *host_type, double *sublo,
-                        double *subhi, tagint *tag, int **nspecial,
-                        tagint **special, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
-                        int **ilist, int **jnum,
-                        const double cpu_time, bool &success);
-void vashishta_gpu_compute(const int ago, const int nloc, const int nall,
-                           const int ln, double **host_x, int *host_type,
-                           int *ilist, int *numj, int **firstneigh,
-                           const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           const double cpu_time, bool &success);
+int **vashishta_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                              int *host_type, double *sublo, double *subhi, tagint *tag,
+                              int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                              const bool eatom, const bool vatom, int &host_start, int **ilist,
+                              int **jnum, const double cpu_time, bool &success);
+void vashishta_gpu_compute(const int ago, const int nloc, const int nall, const int ln,
+                           double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
+                           const bool eflag, const bool vflag, const bool eatom, const bool vatom,
+                           int &host_start, const double cpu_time, bool &success);
 double vashishta_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -84,15 +74,14 @@ PairVashishtaGPU::PairVashishtaGPU(LAMMPS *lmp) : PairVashishta(lmp), gpu_mode(G
 PairVashishtaGPU::~PairVashishtaGPU()
 {
  vashishta_gpu_clear();
-  if (allocated)
-    memory->destroy(cutghost);
+  if (allocated) memory->destroy(cutghost);
 }

 /* ---------------------------------------------------------------------- */

 void PairVashishtaGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -100,7 +89,7 @@ void PairVashishtaGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -109,40 +98,34 @@ void PairVashishtaGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = vashishta_gpu_compute_n(neighbor->ago, inum, nall,
-                                   atom->x, atom->type, sublo,
-                                   subhi, atom->tag, atom->nspecial,
-                                   atom->special, eflag, vflag, eflag_atom,
-                                   vflag_atom, host_start,
-                                   &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        vashishta_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
+                                vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;

-    vashishta_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
-                   atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
-                   vflag, eflag_atom, vflag_atom, host_start, cpu_time,
-                   success);
+    vashishta_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type, ilist,
+                          numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start,
+                          cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
 }

 /* ---------------------------------------------------------------------- */

 void PairVashishtaGPU::allocate()
 {
-  if (!allocated) {
-    PairVashishta::allocate();
-  }
+  if (!allocated) { PairVashishta::allocate(); }
  int n = atom->ntypes;

-  memory->create(cutghost,n+1,n+1,"pair:cutghost");
+  memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
  gpu_allocated = true;
 }

@ -154,8 +137,7 @@ void PairVashishtaGPU::init_style()
 {
  double cell_size = cutmax + neighbor->skin;

-  if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style vashishta/gpu requires atom IDs");
+  if (atom->tag_enable == 0) error->all(FLERR, "Pair style vashishta/gpu requires atom IDs");

  double *cutsq, *r0, *gamma, *eta;
  double *lam1inv, *lam4inv, *zizj, *mbigd;
@ -169,24 +151,24 @@ void PairVashishtaGPU::init_style()
  bigw = c0 = costheta = bigb = nullptr;
  big2b = bigc = nullptr;

-  memory->create(cutsq,nparams,"pair:cutsq");
-  memory->create(r0,nparams,"pair:r0");
-  memory->create(gamma,nparams,"pair:gamma");
-  memory->create(eta,nparams,"pair:eta");
-  memory->create(lam1inv,nparams,"pair:lam1inv");
-  memory->create(lam4inv,nparams,"pair:lam4inv");
-  memory->create(zizj,nparams,"pair:zizj");
-  memory->create(mbigd,nparams,"pair:mbigd");
-  memory->create(dvrc,nparams,"pair:dvrc");
-  memory->create(big6w,nparams,"pair:big6w");
-  memory->create(heta,nparams,"pair:heta");
-  memory->create(bigh,nparams,"pair:bigh");
-  memory->create(bigw,nparams,"pair:bigw");
-  memory->create(c0,nparams,"pair:c0");
-  memory->create(costheta,nparams,"pair:costheta");
-  memory->create(bigb,nparams,"pair:bigb");
-  memory->create(big2b,nparams,"pair:big2b");
-  memory->create(bigc,nparams,"pair:bigc");
+  memory->create(cutsq, nparams, "pair:cutsq");
+  memory->create(r0, nparams, "pair:r0");
+  memory->create(gamma, nparams, "pair:gamma");
+  memory->create(eta, nparams, "pair:eta");
+  memory->create(lam1inv, nparams, "pair:lam1inv");
+  memory->create(lam4inv, nparams, "pair:lam4inv");
+  memory->create(zizj, nparams, "pair:zizj");
+  memory->create(mbigd, nparams, "pair:mbigd");
+  memory->create(dvrc, nparams, "pair:dvrc");
+  memory->create(big6w, nparams, "pair:big6w");
+  memory->create(heta, nparams, "pair:heta");
+  memory->create(bigh, nparams, "pair:bigh");
+  memory->create(bigw, nparams, "pair:bigw");
+  memory->create(c0, nparams, "pair:c0");
+  memory->create(costheta, nparams, "pair:costheta");
+  memory->create(bigb, nparams, "pair:bigb");
+  memory->create(big2b, nparams, "pair:big2b");
+  memory->create(bigc, nparams, "pair:bigc");

  for (int i = 0; i < nparams; i++) {
    cutsq[i] = params[i].cutsq;
@ -209,11 +191,10 @@ void PairVashishtaGPU::init_style()
    bigc[i] = params[i].bigc;
  }
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = vashishta_gpu_init(atom->ntypes+1, atom->nlocal, atom->nlocal+atom->nghost, mnf,
-                            cell_size, gpu_mode, screen, map, nelements,
-                            elem3param, nparams, cutsq, r0, gamma, eta, lam1inv,
-                            lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw,
-                            c0, costheta, bigb, big2b, bigc);
+  int success = vashishta_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                                   cell_size, gpu_mode, screen, map, nelements, elem3param, nparams,
+                                   cutsq, r0, gamma, eta, lam1inv, lam4inv, zizj, mbigd, dvrc,
+                                   big6w, heta, bigh, bigw, c0, costheta, bigb, big2b, bigc);
  memory->destroy(cutsq);
  memory->destroy(r0);
  memory->destroy(gamma);
@ -233,18 +214,13 @@ void PairVashishtaGPU::init_style()
  memory->destroy(big2b);
  memory->destroy(bigc);

-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->ghost = 1;
-  }
-  if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
-    comm->cutghostuser=2.0*cutmax + neighbor->skin;
-    if (comm->me == 0)
-       error->warning(FLERR,"Increasing communication cutoff for GPU style");
+  if (gpu_mode == GPU_FORCE)
+    neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
+  if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
+    comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
+    if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
  }
 }

@ -254,13 +230,10 @@ void PairVashishtaGPU::init_style()

 double PairVashishtaGPU::init_one(int i, int j)
 {
-  if (!gpu_allocated) {
-    allocate();
-  }
-  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+  if (!gpu_allocated) { allocate(); }
+  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
  cutghost[i][j] = cutmax;
  cutghost[j][i] = cutmax;

  return cutmax;
 }
-
--- a/src/GPU/pair_yukawa_colloid_gpu.cpp
+++ b/src/GPU/pair_yukawa_colloid_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,34 +32,26 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a,
-                       double **host_offset, double *special_lj, const int inum,
-                       const int nall, const int max_nbors,
-                       const int maxspecial, const double cell_size,
-                       int &gpu_mode, FILE *screen, const double kappa);
+int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a, double **host_offset,
+                       double *special_lj, const int inum, const int nall, const int max_nbors,
+                       const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
+                       const double kappa);
 void ykcolloid_gpu_clear();
-int ** ykcolloid_gpu_compute_n(const int ago, const int inum_full,
-                               const int nall, double **host_x, int *host_type,
-                               double *sublo, double *subhi, tagint *tag,
-                               int **nspecial, tagint **special,
-                               const bool eflag, const bool vflag,
-                               const bool eatom, const bool vatom,
-                               int &host_start, int **ilist, int **jnum,
-                               const double cpu_time, bool &success,
-                               double *host_rad);
-void ykcolloid_gpu_compute(const int ago, const int inum_full,
-                           const int nall, double **host_x, int *host_type,
-                           int *ilist, int *numj, int **firstneigh,
-                           const bool eflag, const bool vflag,
-                           const bool eatom, const bool vatom, int &host_start,
-                           const double cpu_time, bool &success,
-                           double *host_rad);
+int **ykcolloid_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                              int *host_type, double *sublo, double *subhi, tagint *tag,
+                              int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                              const bool eatom, const bool vatom, int &host_start, int **ilist,
+                              int **jnum, const double cpu_time, bool &success, double *host_rad);
+void ykcolloid_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                           int *host_type, int *ilist, int *numj, int **firstneigh,
+                           const bool eflag, const bool vflag, const bool eatom, const bool vatom,
+                           int &host_start, const double cpu_time, bool &success, double *host_rad);
 double ykcolloid_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairYukawaColloidGPU::PairYukawaColloidGPU(LAMMPS *lmp) : PairYukawaColloid(lmp),
-  gpu_mode(GPU_FORCE)
+PairYukawaColloidGPU::PairYukawaColloidGPU(LAMMPS *lmp) :
+    PairYukawaColloid(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -83,7 +73,7 @@ PairYukawaColloidGPU::~PairYukawaColloidGPU()

 void PairYukawaColloidGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -91,7 +81,7 @@ void PairYukawaColloidGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -100,32 +90,25 @@ void PairYukawaColloidGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = ykcolloid_gpu_compute_n(neighbor->ago, inum, nall,
-                                         atom->x, atom->type,
-                                         sublo,
-                                         subhi, atom->tag,
-                                         atom->nspecial, atom->special,
-                                         eflag, vflag, eflag_atom,
-                                         vflag_atom, host_start, &ilist,
-                                         &numneigh, cpu_time,
-                                         success, atom->radius);
+    firstneigh = ykcolloid_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
+                                         subhi, atom->tag, atom->nspecial, atom->special, eflag,
+                                         vflag, eflag_atom, vflag_atom, host_start, &ilist,
+                                         &numneigh, cpu_time, success, atom->radius);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    ykcolloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                          ilist, numneigh, firstneigh, eflag, vflag,
-                          eflag_atom, vflag_atom, host_start, cpu_time,
+    ykcolloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
+                          firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
                          success, atom->radius);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -138,9 +121,7 @@ void PairYukawaColloidGPU::compute(int eflag, int vflag)

 void PairYukawaColloidGPU::init_style()
 {
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Pair yukawa/colloid/gpu requires atom style sphere");
-
+  if (!atom->sphere_flag) error->all(FLERR, "Pair yukawa/colloid/gpu requires atom style sphere");

  // Repeat cutsq calculation because done after call to init_style
  double maxcut = -1.0;
@ -148,10 +129,9 @@ void PairYukawaColloidGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -159,21 +139,15 @@ void PairYukawaColloidGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = ykcolloid_gpu_init(atom->ntypes+1, cutsq, a,
-                                   offset, force->special_lj, atom->nlocal,
-                                   atom->nlocal+atom->nghost, mnf, maxspecial,
+  int success = ykcolloid_gpu_init(atom->ntypes + 1, cutsq, a, offset, force->special_lj,
+                                   atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial,
                                   cell_size, gpu_mode, screen, kappa);
-  GPU_EXTRA::check_flag(success,error,world);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -186,12 +160,12 @@ double PairYukawaColloidGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairYukawaColloidGPU::cpu_compute(int start, int inum, int eflag,
-                                       int /* vflag */, int *ilist,
-                                       int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj;
-  double r,rsq,rinv,screening,forceyukawa,factor;
+void PairYukawaColloidGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                       int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair, radi, radj;
+  double r, rsq, rinv, screening, forceyukawa, factor;
  int *jlist;

  double **x = atom->x;
@ -220,28 +194,28 @@ void PairYukawaColloidGPU::cpu_compute(int start, int inum, int eflag,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];
      radj = radius[j];

      if (rsq < cutsq[itype][jtype]) {
        r = sqrt(rsq);
-        rinv = 1.0/r;
-        screening = exp(-kappa*(r-(radi+radj)));
+        rinv = 1.0 / r;
+        screening = exp(-kappa * (r - (radi + radj)));
        forceyukawa = a[itype][jtype] * screening;

-        fpair = factor*forceyukawa * rinv;
+        fpair = factor * forceyukawa * rinv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
-          evdwl = a[itype][jtype]/kappa * screening - offset[itype][jtype];
+          evdwl = a[itype][jtype] / kappa * screening - offset[itype][jtype];
          evdwl *= factor;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_yukawa_gpu.cpp
+++ b/src/GPU/pair_yukawa_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,30 +32,25 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
-                    double **host_a, double **offset, double *special_lj,
-                    const int inum, const int nall, const int max_nbors,
-                    const int maxspecial, const double cell_size,
+int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa, double **host_a,
+                    double **offset, double *special_lj, const int inum, const int nall,
+                    const int max_nbors, const int maxspecial, const double cell_size,
                    int &gpu_mode, FILE *screen);
 void yukawa_gpu_clear();
-int ** yukawa_gpu_compute_n(const int ago, const int inum_full, const int nall,
-                            double **host_x, int *host_type, double *sublo,
-                            double *subhi, tagint *tag, int **nspecial,
-                            tagint **special, const bool eflag,
-                            const bool vflag, const bool eatom,
-                            const bool vatom, int &host_start, int **ilist,
-                            int **jnum, const double cpu_time, bool &success);
-void yukawa_gpu_compute(const int ago, const int inum_full, const int nall,
-                        double **host_x, int *host_type, int *ilist, int *numj,
-                        int **firstneigh, const bool eflag, const bool vflag,
-                        const bool eatom, const bool vatom, int &host_start,
+int **yukawa_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                           int *host_type, double *sublo, double *subhi, tagint *tag,
+                           int **nspecial, tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start, int **ilist,
+                           int **jnum, const double cpu_time, bool &success);
+void yukawa_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                        int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                        const bool vflag, const bool eatom, const bool vatom, int &host_start,
                        const double cpu_time, bool &success);
 double yukawa_gpu_bytes();

 /* ---------------------------------------------------------------------- */

-PairYukawaGPU::PairYukawaGPU(LAMMPS *lmp) : PairYukawa(lmp),
-                                            gpu_mode(GPU_FORCE)
+PairYukawaGPU::PairYukawaGPU(LAMMPS *lmp) : PairYukawa(lmp), gpu_mode(GPU_FORCE)
 {
  respa_enable = 0;
  reinitflag = 0;
@ -79,7 +72,7 @@ PairYukawaGPU::~PairYukawaGPU()

 void PairYukawaGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -87,7 +80,7 @@ void PairYukawaGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -96,28 +89,24 @@ void PairYukawaGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = yukawa_gpu_compute_n(neighbor->ago, inum, nall,
-                                      atom->x, atom->type, sublo,
-                                      subhi, atom->tag, atom->nspecial,
-                                      atom->special, eflag, vflag, eflag_atom,
-                                      vflag_atom, host_start,
-                                      &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        yukawa_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                             atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
+                             vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    yukawa_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                       vflag_atom, host_start, cpu_time, success);
+    yukawa_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                       eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -137,10 +126,9 @@ void PairYukawaGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -148,21 +136,15 @@ void PairYukawaGPU::init_style()
  }
  double cell_size = sqrt(maxcut) + neighbor->skin;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = yukawa_gpu_init(atom->ntypes+1, cutsq, kappa, a,
-                                offset, force->special_lj, atom->nlocal,
-                                atom->nlocal+atom->nghost, mnf, maxspecial,
-                                cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      yukawa_gpu_init(atom->ntypes + 1, cutsq, kappa, a, offset, force->special_lj, atom->nlocal,
+                      atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -175,11 +157,12 @@ double PairYukawaGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                                int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r,rinv,screening,forceyukawa,factor;
+void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r2inv, r, rinv, screening, forceyukawa, factor;
  int *jlist;

  double **x = atom->x;
@ -206,28 +189,28 @@ void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        r2inv = 1.0/rsq;
+        r2inv = 1.0 / rsq;
        r = sqrt(rsq);
-        rinv = 1.0/r;
-        screening = exp(-kappa*r);
+        rinv = 1.0 / r;
+        screening = exp(-kappa * r);
        forceyukawa = a[itype][jtype] * screening * (kappa + rinv);

-        fpair = factor*forceyukawa * r2inv;
+        fpair = factor * forceyukawa * r2inv;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          evdwl = a[itype][jtype] * screening * rinv - offset[itype][jtype];
          evdwl *= factor;
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }
--- a/src/GPU/pair_zbl_gpu.cpp
+++ b/src/GPU/pair_zbl_gpu.cpp
@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
 #include "force.h"
 #include "gpu_extra.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "suffix.h"

@ -34,27 +32,22 @@ using namespace LAMMPS_NS;

 // External functions from cuda library for atom decomposition

-int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1,
-                 double **host_sw2, double **host_sw3, double **host_sw4,
-                 double **host_sw5, double **host_d1a, double **host_d2a,
-                 double **host_d3a, double **host_d4a, double **host_zze,
-                 double cut_globalsq, double cut_innersq, double cut_inner,
-                 const int inum, const int nall, const int max_nbors,
-                 const int maxspecial, const double cell_size,
+int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1, double **host_sw2,
+                 double **host_sw3, double **host_sw4, double **host_sw5, double **host_d1a,
+                 double **host_d2a, double **host_d3a, double **host_d4a, double **host_zze,
+                 double cut_globalsq, double cut_innersq, double cut_inner, const int inum,
+                 const int nall, const int max_nbors, const int maxspecial, const double cell_size,
                 int &gpu_mode, FILE *screen);
 void zbl_gpu_clear();
-int ** zbl_gpu_compute_n(const int ago, const int inum, const int nall,
-                         double **host_x, int *host_type, double *sublo,
-                         double *subhi, tagint *tag, int **nspecial,
-                         tagint **special, const bool eflag, const bool vflag,
-                         const bool eatom, const bool vatom, int &host_start,
-                         int **ilist, int **jnum,
-                         const double cpu_time, bool &success);
-void zbl_gpu_compute(const int ago, const int inum, const int nall,
-                     double **host_x, int *host_type, int *ilist, int *numj,
-                     int **firstneigh, const bool eflag, const bool vflag,
-                     const bool eatom, const bool vatom, int &host_start,
-                     const double cpu_time, bool &success);
+int **zbl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success);
+void zbl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success);
 double zbl_gpu_bytes();

 /* ---------------------------------------------------------------------- */
@ -81,7 +74,7 @@ PairZBLGPU::~PairZBLGPU()

 void PairZBLGPU::compute(int eflag, int vflag)
 {
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);

  int nall = atom->nlocal + atom->nghost;
  int inum, host_start;
@ -89,7 +82,7 @@ void PairZBLGPU::compute(int eflag, int vflag)
  bool success = true;
  int *ilist, *numneigh, **firstneigh;
  if (gpu_mode != GPU_FORCE) {
-    double sublo[3],subhi[3];
+    double sublo[3], subhi[3];
    if (domain->triclinic == 0) {
      sublo[0] = domain->sublo[0];
      sublo[1] = domain->sublo[1];
@ -98,28 +91,24 @@ void PairZBLGPU::compute(int eflag, int vflag)
      subhi[1] = domain->subhi[1];
      subhi[2] = domain->subhi[2];
    } else {
-      domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
    }
    inum = atom->nlocal;
-    firstneigh = zbl_gpu_compute_n(neighbor->ago, inum, nall,
-                                   atom->x, atom->type, sublo,
-                                   subhi, atom->tag, atom->nspecial,
-                                   atom->special, eflag, vflag, eflag_atom,
-                                   vflag_atom, host_start,
-                                   &ilist, &numneigh, cpu_time, success);
+    firstneigh =
+        zbl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
+                          atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
+                          host_start, &ilist, &numneigh, cpu_time, success);
  } else {
    inum = list->inum;
    ilist = list->ilist;
    numneigh = list->numneigh;
    firstneigh = list->firstneigh;
-    zbl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
-                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
-                    vflag_atom, host_start, cpu_time, success);
+    zbl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
  }
-  if (!success)
-    error->one(FLERR,"Insufficient memory on accelerator");
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");

-  if (host_start<inum) {
+  if (host_start < inum) {
    cpu_time = platform::walltime();
    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
    cpu_time = platform::walltime() - cpu_time;
@ -139,10 +128,9 @@ void PairZBLGPU::init_style()
  for (int i = 1; i <= atom->ntypes; i++) {
    for (int j = i; j <= atom->ntypes; j++) {
      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
-        cut = init_one(i,j);
+        cut = init_one(i, j);
        cut *= cut;
-        if (cut > maxcut)
-          maxcut = cut;
+        if (cut > maxcut) maxcut = cut;
        cutsq[i][j] = cutsq[j][i] = cut;
      } else
        cutsq[i][j] = cutsq[j][i] = 0.0;
@ -153,22 +141,16 @@ void PairZBLGPU::init_style()
  cut_innersq = cut_inner * cut_inner;
  cut_globalsq = cut_global * cut_global;

-  int maxspecial=0;
-  if (atom->molecular != Atom::ATOMIC)
-    maxspecial=atom->maxspecial;
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
  int mnf = 5e-2 * neighbor->oneatom;
-  int success = zbl_gpu_init(atom->ntypes+1, cutsq, sw1, sw2, sw3, sw4,
-                             sw5, d1a, d2a, d3a, d4a, zze,
-                             cut_globalsq, cut_innersq, cut_inner,
-                             atom->nlocal, atom->nlocal+atom->nghost,
-                             mnf, maxspecial, cell_size, gpu_mode, screen);
-  GPU_EXTRA::check_flag(success,error,world);
+  int success =
+      zbl_gpu_init(atom->ntypes + 1, cutsq, sw1, sw2, sw3, sw4, sw5, d1a, d2a, d3a, d4a, zze,
+                   cut_globalsq, cut_innersq, cut_inner, atom->nlocal, atom->nlocal + atom->nghost,
+                   mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);

-  if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->full = 1;
-  }
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
 }

 /* ---------------------------------------------------------------------- */
@ -181,11 +163,12 @@ double PairZBLGPU::memory_usage()

 /* ---------------------------------------------------------------------- */

-void PairZBLGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
-                             int *ilist, int *numneigh, int **firstneigh) {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r,t,fswitch,eswitch;
+void PairZBLGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                             int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, r, t, fswitch, eswitch;
  int *jlist;

  double **x = atom->x;
@ -210,36 +193,34 @@ void PairZBLGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
      delx = xtmp - x[j][0];
      dely = ytmp - x[j][1];
      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
      jtype = type[j];

      if (rsq < cut_globalsq) {
-              r = sqrt(rsq);
+        r = sqrt(rsq);
        fpair = dzbldr(r, itype, jtype);

-              if (rsq > cut_innersq) {
-                t = r - cut_inner;
-                fswitch = t*t *
-                  (sw1[itype][jtype] + sw2[itype][jtype]*t);
-                fpair += fswitch;
-              }
+        if (rsq > cut_innersq) {
+          t = r - cut_inner;
+          fswitch = t * t * (sw1[itype][jtype] + sw2[itype][jtype] * t);
+          fpair += fswitch;
+        }

-        fpair *= -1.0/r;
-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        fpair *= -1.0 / r;
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;

        if (eflag) {
          evdwl = e_zbl(r, itype, jtype);
-                evdwl += sw5[itype][jtype];
-                if (rsq > cut_innersq) {
-                  eswitch = t*t*t *
-                    (sw3[itype][jtype] + sw4[itype][jtype]*t);
-                  evdwl += eswitch;
-                }
+          evdwl += sw5[itype][jtype];
+          if (rsq > cut_innersq) {
+            eswitch = t * t * t * (sw3[itype][jtype] + sw4[itype][jtype] * t);
+            evdwl += eswitch;
+          }
        }

-        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
      }
    }
  }