diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile
index 99f466965b..3587e4ebd4 100644
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@@ -31,8 +31,8 @@ UCL_H  = $(wildcard ./geryon/ucl*.h)
 NVC_H  = $(wildcard ./geryon/nvc*.h) $(UCL_H)
 NVD_H  = $(wildcard ./geryon/nvd*.h) $(UCL_H) nv_kernel_def.h
 # Headers for Pair Stuff
-PAIR_H  = atom.h ans.h nbor_shared.h \
-          nbor.h precision.h device.h \
+PAIR_H  = atom.h answer.h neighbor_shared.h \
+          neighbor.h precision.h device.h \
           balance.h pppm.h
 
 ALL_H = $(NVD_H) $(PAIR_H)
@@ -42,7 +42,7 @@ CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
         $(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
         $(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
 OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
-       $(OBJ_DIR)/nbor.o $(OBJ_DIR)/nbor_shared.o \
+       $(OBJ_DIR)/nbor.o $(OBJ_DIR)/neighbor_shared.o \
        $(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
        $(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
        $(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
@@ -62,8 +62,8 @@ OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
        $(CUDPP)
 PTXS = $(OBJ_DIR)/device.ptx \
        $(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom_ptx.h \
-       $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
-       $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
+       $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
+       $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
        $(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \
        $(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \
        $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \
@@ -111,26 +111,26 @@ $(OBJ_DIR)/atom_ptx.h: $(OBJ_DIR)/atom.ptx
 $(OBJ_DIR)/atom.o: atom.cpp atom.h $(NVD_H) $(OBJ_DIR)/atom_ptx.h
 	$(CUDR) -o $@ -c atom.cpp -I$(OBJ_DIR)
 
-$(OBJ_DIR)/ans.o: ans.cpp ans.h $(NVD_H)
-	$(CUDR) -o $@ -c ans.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/ans.o: answer.cpp answer.h $(NVD_H)
+	$(CUDR) -o $@ -c answer.cpp -I$(OBJ_DIR)
 
-$(OBJ_DIR)/nbor_cpu.ptx: nbor_cpu.cu
-	$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_cpu.cu
+$(OBJ_DIR)/neighbor_cpu.ptx: neighbor_cpu.cu
+	$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_cpu.cu
 
-$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/nbor_cpu.ptx
-	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h
+$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/neighbor_cpu.ptx
+	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h
 
-$(OBJ_DIR)/nbor_gpu.ptx: nbor_gpu.cu
-	$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_gpu.cu
+$(OBJ_DIR)/neighbor_gpu.ptx: neighbor_gpu.cu
+	$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_gpu.cu
 
-$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/nbor_gpu.ptx
-	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
+$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/neighbor_gpu.ptx
+	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
 
-$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
-	$(CUDR) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
+	$(CUDR) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)
 
-$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h nbor_shared.h $(NVD_H)
-	$(CUDR) -o $@ -c nbor.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h neighbor_shared.h $(NVD_H)
+	$(CUDR) -o $@ -c neighbor.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/device.ptx: device.cu
 	$(CUDA) --ptx -DNV_KERNEL -o $@ device.cu
diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile
index 6f91dfe9c4..280baf8980 100644
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@@ -24,15 +24,15 @@ OCL_LIB = $(LIB_DIR)/libgpu.a
 UCL_H  = $(wildcard ./geryon/ucl*.h)
 OCL_H  = $(wildcard ./geryon/ocl*.h) $(UCL_H)
 # Headers for Pair Stuff
-PAIR_H  = atom.h ans.h nbor_shared.h \
-          nbor.h precision.h device.h \
+PAIR_H  = atom.h answer.h neighbor_shared.h \
+          neighbor.h precision.h device.h \
           balance.h pppm.h
 
 ALL_H = $(OCL_H) $(PAIR_H)
 
 EXECS = $(BIN_DIR)/ocl_get_devices
 OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
-       $(OBJ_DIR)/nbor_shared.o $(OBJ_DIR)/nbor.o \
+       $(OBJ_DIR)/neighbor_shared.o $(OBJ_DIR)/nbor.o \
        $(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
        $(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
        $(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
@@ -69,17 +69,17 @@ $(OBJ_DIR)/atom_cl.h: atom.cu
 $(OBJ_DIR)/atom.o: atom.cpp atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h
 	$(OCL) -o $@ -c atom.cpp -I$(OBJ_DIR)
 
-$(OBJ_DIR)/ans.o: ans.cpp ans.h $(OCL_H)
-	$(OCL) -o $@ -c ans.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/ans.o: answer.cpp answer.h $(OCL_H)
+	$(OCL) -o $@ -c answer.cpp -I$(OBJ_DIR)
 
-$(OBJ_DIR)/nbor_cl.h: nbor_cpu.cu
-	$(BSH) ./geryon/file_to_cstr.sh nbor_cpu.cu $(OBJ_DIR)/nbor_cl.h
+$(OBJ_DIR)/nbor_cl.h: neighbor_cpu.cu
+	$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu.cu $(OBJ_DIR)/nbor_cl.h
 
-$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
-	$(OCL) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
+	$(OCL) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)
 
-$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h $(OCL_H) nbor_shared.h
-	$(OCL) -o $@ -c nbor.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h $(OCL_H) neighbor_shared.h
+	$(OCL) -o $@ -c neighbor.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/pair_gpu_dev_cl.h: device.cu
 	$(BSH) ./geryon/file_to_cstr.sh device.cu $(OBJ_DIR)/pair_gpu_dev_cl.h
diff --git a/lib/gpu/ans.cpp b/lib/gpu/answer.cpp
similarity index 82%
rename from lib/gpu/ans.cpp
rename to lib/gpu/answer.cpp
index f159b73efe..5dc818bb48 100644
--- a/lib/gpu/ans.cpp
+++ b/lib/gpu/answer.cpp
@@ -1,31 +1,29 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                 answer.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for data management of forces, torques, energies, and virials
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-#include "ans.h"
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
-#define PairGPUAnsT PairGPUAns<numtyp,acctyp>
+#include "answer.h"
+
+#define AnswerT Answer<numtyp,acctyp>
 
 template <class numtyp, class acctyp>
-PairGPUAnsT::PairGPUAns() : _allocated(false),_eflag(false),_vflag(false),
+AnswerT::Answer() : _allocated(false),_eflag(false),_vflag(false),
                             _inum(0),_ilist(NULL),_newton(false) {
 }
 
 template <class numtyp, class acctyp>
-int PairGPUAnsT::bytes_per_atom() const { 
+int AnswerT::bytes_per_atom() const { 
   int bytes=11*sizeof(acctyp);
   if (_rot)
     bytes+=4*sizeof(acctyp);
@@ -35,7 +33,7 @@ int PairGPUAnsT::bytes_per_atom() const {
 }
 
 template <class numtyp, class acctyp>
-bool PairGPUAnsT::alloc(const int inum) {
+bool AnswerT::alloc(const int inum) {
   _max_local=static_cast<int>(static_cast<double>(inum)*1.10);
 
   bool success=true;
@@ -70,7 +68,7 @@ bool PairGPUAnsT::alloc(const int inum) {
 }
 
 template <class numtyp, class acctyp>
-bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
+bool AnswerT::init(const int inum, const bool charge, const bool rot,
                        UCL_Device &devi) {
   clear();
 
@@ -100,7 +98,7 @@ bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
 }
   
 template <class numtyp, class acctyp>
-bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
+bool AnswerT::add_fields(const bool charge, const bool rot) {
   bool realloc=false;
   if (charge && _charge==false) {
     _charge=true;
@@ -122,7 +120,7 @@ bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
 }
 
 template <class numtyp, class acctyp>
-void PairGPUAnsT::clear_resize() {
+void AnswerT::clear_resize() {
   if (!_allocated)
     return;
   _allocated=false;
@@ -134,7 +132,7 @@ void PairGPUAnsT::clear_resize() {
 }
 
 template <class numtyp, class acctyp>
-void PairGPUAnsT::clear() {
+void AnswerT::clear() {
   _gpu_bytes=0;
   if (!_allocated)
     return;
@@ -148,7 +146,7 @@ void PairGPUAnsT::clear() {
 }
 
 template <class numtyp, class acctyp>
-double PairGPUAnsT::host_memory_usage() const {
+double AnswerT::host_memory_usage() const {
   int atom_bytes=4;
   if (_charge) 
     atom_bytes+=1;
@@ -156,11 +154,11 @@ double PairGPUAnsT::host_memory_usage() const {
     atom_bytes+=4;
   int ans_bytes=atom_bytes+_ev_fields;
   return ans_bytes*(_max_local)*sizeof(acctyp)+
-         sizeof(PairGPUAns<numtyp,acctyp>);
+         sizeof(Answer<numtyp,acctyp>);
 }
   
 template <class numtyp, class acctyp>
-void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
+void AnswerT::copy_answers(const bool eflag, const bool vflag,
                                const bool ef_atom, const bool vf_atom) {
   time_answer.start();
   _eflag=eflag;
@@ -184,7 +182,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
 }
 
 template <class numtyp, class acctyp>
-void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
+void AnswerT::copy_answers(const bool eflag, const bool vflag,
                                const bool ef_atom, const bool vf_atom,
                                int *ilist) {
   _ilist=ilist;
@@ -192,7 +190,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
 }
 
 template <class numtyp, class acctyp>
-double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
+double AnswerT::energy_virial(double *eatom, double **vatom,
                                   double *virial) {
   if (_eflag==false && _vflag==false)
     return 0.0;
@@ -268,7 +266,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
 }
 
 template <class numtyp, class acctyp>
-double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
+double AnswerT::energy_virial(double *eatom, double **vatom,
                                    double *virial, double &ecoul) {
   if (_eflag==false && _vflag==false)
     return 0.0;
@@ -359,7 +357,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
 }
 
 template <class numtyp, class acctyp>
-void PairGPUAnsT::get_answers(double **f, double **tor) {
+void AnswerT::get_answers(double **f, double **tor) {
   acctyp *ap=host_ans.begin();
   if (_ilist==NULL) {
     for (int i=0; i<_inum; i++) {
@@ -404,4 +402,4 @@ void PairGPUAnsT::get_answers(double **f, double **tor) {
   }
 }
 
-template class PairGPUAns<PRECISION,ACC_PRECISION>;
+template class Answer<PRECISION,ACC_PRECISION>;
diff --git a/lib/gpu/ans.h b/lib/gpu/answer.h
similarity index 81%
rename from lib/gpu/ans.h
rename to lib/gpu/answer.h
index 61e97c6fe3..715b0d0d54 100644
--- a/lib/gpu/ans.h
+++ b/lib/gpu/answer.h
@@ -1,22 +1,20 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  answer.h
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for data management of forces, torques, energies, and virials
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
-#ifndef PAIR_GPU_ANS_H
-#define PAIR_GPU_ANS_H
+#ifndef LAL_ANSWER_H
+#define LAL_ANSWER_H
 
 #include <math.h>
 #include "mpi.h"
@@ -38,10 +36,10 @@ using namespace ucl_cudadr;
 #include "precision.h"
 
 template <class numtyp, class acctyp>
-class PairGPUAns {
+class Answer {
  public:
-  PairGPUAns();
-  ~PairGPUAns() { clear(); }
+  Answer();
+  ~Answer() { clear(); }
 
   /// Current number of local atoms stored
   inline int inum() const { return _inum; }
diff --git a/lib/gpu/atom.cpp b/lib/gpu/atom.cpp
index aa7257443d..788522cfd7 100644
--- a/lib/gpu/atom.cpp
+++ b/lib/gpu/atom.cpp
@@ -1,26 +1,24 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  atom.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for particle data management
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
 #include "atom.h"
 
-#define PairGPUAtomT PairGPUAtom<numtyp,acctyp>
+#define AtomT Atom<numtyp,acctyp>
 
 template <class numtyp, class acctyp>
-PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
+AtomT::Atom() : _compiled(false),_allocated(false),
                               _max_gpu_bytes(0) {
   #ifndef USE_OPENCL
   sort_config.op = CUDPP_ADD;
@@ -31,7 +29,7 @@ PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
 }
 
 template <class numtyp, class acctyp>
-int PairGPUAtomT::bytes_per_atom() const { 
+int AtomT::bytes_per_atom() const { 
   int id_space=0;
   if (_gpu_nbor)
     id_space=2;
@@ -44,7 +42,7 @@ int PairGPUAtomT::bytes_per_atom() const {
 }
 
 template <class numtyp, class acctyp>
-bool PairGPUAtomT::alloc(const int nall) {
+bool AtomT::alloc(const int nall) {
   _max_atoms=static_cast<int>(static_cast<double>(nall)*1.10);
 
   bool success=true;
@@ -138,7 +136,7 @@ bool PairGPUAtomT::alloc(const int nall) {
 }
 
 template <class numtyp, class acctyp>
-bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
+bool AtomT::add_fields(const bool charge, const bool rot,
                               const bool gpu_nbor, const bool bonds) {
   bool realloc=false;
   if (charge && _charge==false) {
@@ -167,7 +165,7 @@ bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
 }
 
 template <class numtyp, class acctyp>
-bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
+bool AtomT::init(const int nall, const bool charge, const bool rot,
                         UCL_Device &devi, const bool gpu_nbor,
                         const bool bonds) {
   clear();
@@ -206,7 +204,7 @@ bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
 }
   
 template <class numtyp, class acctyp>
-void PairGPUAtomT::clear_resize() {
+void AtomT::clear_resize() {
   if (!_allocated)
     return;
   _allocated=false;
@@ -240,7 +238,7 @@ void PairGPUAtomT::clear_resize() {
 }
 
 template <class numtyp, class acctyp>
-void PairGPUAtomT::clear() {
+void AtomT::clear() {
   _max_gpu_bytes=0;
   if (!_allocated)
     return;
@@ -260,19 +258,19 @@ void PairGPUAtomT::clear() {
 }
 
 template <class numtyp, class acctyp>
-double PairGPUAtomT::host_memory_usage() const {
+double AtomT::host_memory_usage() const {
   int atom_bytes=4;
   if (_charge) 
     atom_bytes+=1;
   if (_rot) 
     atom_bytes+=4;
   return _max_atoms*atom_bytes*sizeof(numtyp)+
-         sizeof(PairGPUAtom<numtyp,acctyp>);
+         sizeof(Atom<numtyp,acctyp>);
 }
   
 // Sort arrays for neighbor list calculation
 template <class numtyp, class acctyp>
-void PairGPUAtomT::sort_neighbor(const int num_atoms) {
+void AtomT::sort_neighbor(const int num_atoms) {
   #ifndef USE_OPENCL
   CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(), 
                                  (int *)dev_particle_id.begin(), 
@@ -292,7 +290,7 @@ void PairGPUAtomT::sort_neighbor(const int num_atoms) {
 #endif
 
 template <class numtyp, class acctyp>
-void PairGPUAtomT::compile_kernels(UCL_Device &dev) {
+void AtomT::compile_kernels(UCL_Device &dev) {
   atom_program=new UCL_Program(dev);
   atom_program->load_string(atom,"");
   k_cast_x.set_function(*atom_program,"kernel_cast_x");
@@ -301,4 +299,4 @@ void PairGPUAtomT::compile_kernels(UCL_Device &dev) {
 
 #endif
 
-template class PairGPUAtom<PRECISION,ACC_PRECISION>;
+template class Atom<PRECISION,ACC_PRECISION>;
diff --git a/lib/gpu/atom.cu b/lib/gpu/atom.cu
index ab79ac6e9c..06c194e5de 100644
--- a/lib/gpu/atom.cu
+++ b/lib/gpu/atom.cu
@@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+// **************************************************************************
+//                                  atom.cu
+//                             -------------------
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for atom data casting
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : brownw@ornl.gov
+// ***************************************************************************/
 
 #ifdef NV_KERNEL
 #include "geryon/ucl_nv_kernel.h"
diff --git a/lib/gpu/atom.h b/lib/gpu/atom.h
index 25fbf0899c..4b4f72095f 100644
--- a/lib/gpu/atom.h
+++ b/lib/gpu/atom.h
@@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                   atom.h
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for particle data management
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
 #ifndef PAIR_GPU_ATOM_H
 #define PAIR_GPU_ATOM_H
@@ -41,10 +39,10 @@ using namespace ucl_cudadr;
 #include "precision.h"
 
 template <class numtyp, class acctyp>
-class PairGPUAtom {
+class Atom {
  public:
-  PairGPUAtom();
-  ~PairGPUAtom() { clear(); }
+  Atom();
+  ~Atom() { clear(); }
 
   /// Maximum number of atoms that can be stored with current allocation
   inline int max_atoms() const { return _max_atoms; }
diff --git a/lib/gpu/balance.h b/lib/gpu/balance.h
index 465dde2c1c..aa23b9af3b 100644
--- a/lib/gpu/balance.h
+++ b/lib/gpu/balance.h
@@ -1,22 +1,20 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  balance.h
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for host-device load balancing
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-#ifndef PAIR_GPU_BALANCE_H
-#define PAIR_GPU_BALANCE_H
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_BALANCE_H
+#define LAL_BALANCE_H
 
 #include "device.h"
 #include <math.h>
@@ -27,13 +25,13 @@
 
 /// Host/device load balancer
 template<class numtyp, class acctyp>
-class PairGPUBalance {
+class Balance {
  public:
-  inline PairGPUBalance() : _init_done(false), _measure_this_step(false) {}
-  inline ~PairGPUBalance() { clear(); }
+  inline Balance() : _init_done(false), _measure_this_step(false) {}
+  inline ~Balance() { clear(); }
 
   /// Clear any old data and setup for new LAMMPS run
-  inline void init(PairGPUDevice<numtyp, acctyp> *gpu, const bool gpu_nbor,
+  inline void init(Device<numtyp, acctyp> *gpu, const bool gpu_nbor,
                    const double split);
 
   /// Clear all host and device data
@@ -107,7 +105,7 @@ class PairGPUBalance {
   }
   
  private:
-  PairGPUDevice<numtyp,acctyp> *_device;
+  Device<numtyp,acctyp> *_device;
   UCL_Timer _device_time;
   bool _init_done, _gpu_nbor;
   
@@ -119,10 +117,10 @@ class PairGPUBalance {
   int _inum, _inum_full, _timestep;
 };
 
-#define PairGPUBalanceT PairGPUBalance<numtyp,acctyp>
+#define BalanceT Balance<numtyp,acctyp>
 
 template <class numtyp, class acctyp>
-void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu, 
+void BalanceT::init(Device<numtyp, acctyp> *gpu, 
                            const bool gpu_nbor, const double split) {
   clear();
   _gpu_nbor=gpu_nbor;
@@ -145,7 +143,7 @@ void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu,
 }
 
 template <class numtyp, class acctyp>
-int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
+int BalanceT::get_gpu_count(const int ago, const int inum_full) {
   _measure_this_step=false;
   if (_load_balance) {
     if (_avg_count<11 || _timestep%_HD_BALANCE_EVERY==0) {
@@ -164,7 +162,7 @@ int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
 }
     
 template <class numtyp, class acctyp>
-void PairGPUBalanceT::balance(const double cpu_time) {
+void BalanceT::balance(const double cpu_time) {
   if (_measure_this_step) {
     _measure_this_step=false;
     double gpu_time=_device_time.seconds();
diff --git a/lib/gpu/base_atomic.cpp b/lib/gpu/base_atomic.cpp
index dec46e9ae1..4386b3e36e 100644
--- a/lib/gpu/base_atomic.cpp
+++ b/lib/gpu/base_atomic.cpp
@@ -1,46 +1,44 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                base_atomic.h
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles with per-particle data for position and type
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
  
 #include "base_atomic.h"
-#define AtomicGPUMemoryT AtomicGPUMemory<numtyp, acctyp>
+#define BaseAtomicT BaseAtomic<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;
 
 template <class numtyp, class acctyp>
-AtomicGPUMemoryT::AtomicGPUMemory() : _compiled(false), _max_bytes(0)  {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
-  nbor=new PairGPUNbor();
+BaseAtomicT::BaseAtomic() : _compiled(false), _max_bytes(0)  {
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
+  nbor=new Neighbor();
 }
 
 template <class numtyp, class acctyp>
-AtomicGPUMemoryT::~AtomicGPUMemory() {
+BaseAtomicT::~BaseAtomic() {
   delete ans;
   delete nbor;
 }
 
 template <class numtyp, class acctyp>
-int AtomicGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
+int BaseAtomicT::bytes_per_atom_atomic(const int max_nbors) const {
   return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
          nbor->bytes_per_atom(max_nbors);
 }
 
 template <class numtyp, class acctyp>
-int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
+int BaseAtomicT::init_atomic(const int nlocal, const int nall,
                                   const int max_nbors, const int maxspecial,
                                   const double cell_size,
                                   const double gpu_split, FILE *_screen,
@@ -49,7 +47,7 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
   screen=_screen;
 
   bool gpu_nbor=false;
-  if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
+  if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
     gpu_nbor=true;
 
   int _gpu_host=0;
@@ -90,12 +88,12 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
 }
 
 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::estimate_gpu_overhead() {
+void BaseAtomicT::estimate_gpu_overhead() {
   device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
 }
 
 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::clear_atomic() {
+void BaseAtomicT::clear_atomic() {
   // Output any timing information
   acc_timers();
   double avg_split=hd_balancer.all_avg_split();
@@ -123,7 +121,7 @@ void AtomicGPUMemoryT::clear_atomic() {
 // Copy neighbor list from host
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
+int * BaseAtomicT::reset_nbors(const int nall, const int inum, int *ilist,
                                    int *numj, int **firstneigh, bool &success) {
   success=true;
 
@@ -147,7 +145,7 @@ int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
 // Build neighbor list on device
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
+inline void BaseAtomicT::build_nbor_list(const int inum,
                                               const int host_inum,
                                               const int nall, double **host_x,
                                               int *host_type, double *sublo,
@@ -176,7 +174,7 @@ inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
 // Copy nbor list from host if necessary and then calculate forces, virials,..
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
+void BaseAtomicT::compute(const int f_ago, const int inum_full,
                                const int nall, double **host_x, int *host_type,
                                int *ilist, int *numj, int **firstneigh,
                                const bool eflag, const bool vflag,
@@ -217,7 +215,7 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
 // Reneighbor on GPU if necessary and then compute forces, virials, energies
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
+int ** BaseAtomicT::compute(const int ago, const int inum_full,
                                  const int nall, double **host_x, int *host_type,
                                  double *sublo, double *subhi, int *tag,
                                  int **nspecial, int **special, const bool eflag, 
@@ -263,13 +261,13 @@ int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
 }
 
 template <class numtyp, class acctyp>
-double AtomicGPUMemoryT::host_memory_usage_atomic() const {
+double BaseAtomicT::host_memory_usage_atomic() const {
   return device->atom.host_memory_usage()+nbor->host_memory_usage()+
-         4*sizeof(numtyp)+sizeof(AtomicGPUMemory<numtyp,acctyp>);
+         4*sizeof(numtyp)+sizeof(BaseAtomic<numtyp,acctyp>);
 }
 
 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
+void BaseAtomicT::compile_kernels(UCL_Device &dev, const char *pair_str) {
   if (_compiled)
     return;
 
@@ -285,5 +283,5 @@ void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
   _compiled=true;
 }
 
-template class AtomicGPUMemory<PRECISION,ACC_PRECISION>;
+template class BaseAtomic<PRECISION,ACC_PRECISION>;
 
diff --git a/lib/gpu/base_atomic.h b/lib/gpu/base_atomic.h
index dfde62f96d..d8d30856ae 100644
--- a/lib/gpu/base_atomic.h
+++ b/lib/gpu/base_atomic.h
@@ -1,22 +1,20 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                base_atomic.h
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles with per-particle data for position and type
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-#ifndef ATOMIC_GPU_MEMORY_H
-#define ATOMIC_GPU_MEMORY_H
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_BASE_ATOMIC_H
+#define LAL_BASE_ATOMIC_H
 
 #include "device.h"
 #include "balance.h"
@@ -29,10 +27,10 @@
 #endif
 
 template <class numtyp, class acctyp>
-class AtomicGPUMemory {
+class BaseAtomic {
  public:
-  AtomicGPUMemory();
-  virtual ~AtomicGPUMemory();
+  BaseAtomic();
+  virtual ~BaseAtomic();
 
   /// Clear any previous data and set up for a new LAMMPS run
   /** \param max_nbors initial number of rows in the neighbor matrix
@@ -150,7 +148,7 @@ class AtomicGPUMemory {
   // -------------------------- DEVICE DATA ------------------------- 
 
   /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;
 
   /// Geryon device
   UCL_Device *ucl_device;
@@ -159,7 +157,7 @@ class AtomicGPUMemory {
   UCL_Timer time_pair;
 
   /// Host device load balancer
-  PairGPUBalance<numtyp,acctyp> hd_balancer;
+  Balance<numtyp,acctyp> hd_balancer;
 
   /// LAMMPS pointer for screen output
   FILE *screen;
@@ -167,16 +165,16 @@ class AtomicGPUMemory {
   // --------------------------- ATOM DATA --------------------------
 
   /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;
 
   // ------------------------ FORCE/ENERGY DATA -----------------------
 
-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;
 
   // --------------------------- NBOR DATA ----------------------------
 
   /// Neighbor data
-  PairGPUNbor *nbor;
+  Neighbor *nbor;
 
   /// True if we need to accumulate time for neighboring
   bool nbor_time_avail;
diff --git a/lib/gpu/base_charge.cpp b/lib/gpu/base_charge.cpp
index ee63138c29..46ff3bf9d2 100644
--- a/lib/gpu/base_charge.cpp
+++ b/lib/gpu/base_charge.cpp
@@ -1,46 +1,45 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                               base_charge.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles needing per-particle data for position,
+  charge, and type.
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
 #include "base_charge.h"
-#define ChargeGPUMemoryT ChargeGPUMemory<numtyp, acctyp>
+#define BaseChargeT BaseCharge<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;
 
 template <class numtyp, class acctyp>
-ChargeGPUMemoryT::ChargeGPUMemory() : _compiled(false), _max_bytes(0) {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
-  nbor=new PairGPUNbor();
+BaseChargeT::BaseCharge() : _compiled(false), _max_bytes(0) {
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
+  nbor=new Neighbor();
 }
 
 template <class numtyp, class acctyp>
-ChargeGPUMemoryT::~ChargeGPUMemory() {
+BaseChargeT::~BaseCharge() {
   delete ans;
   delete nbor;
 }
 
 template <class numtyp, class acctyp>
-int ChargeGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
+int BaseChargeT::bytes_per_atom_atomic(const int max_nbors) const {
   return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
          nbor->bytes_per_atom(max_nbors);
 }
 
 template <class numtyp, class acctyp>
-int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
+int BaseChargeT::init_atomic(const int nlocal, const int nall,
                                   const int max_nbors, const int maxspecial,
                                   const double cell_size,
                                   const double gpu_split, FILE *_screen,
@@ -49,7 +48,7 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
   screen=_screen;
 
   bool gpu_nbor=false;
-  if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
+  if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
     gpu_nbor=true;
 
   int _gpu_host=0;
@@ -92,12 +91,12 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
 }
 
 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::estimate_gpu_overhead() {
+void BaseChargeT::estimate_gpu_overhead() {
   device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
 }
 
 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::clear_atomic() {
+void BaseChargeT::clear_atomic() {
   // Output any timing information
   acc_timers();
   double avg_split=hd_balancer.all_avg_split();
@@ -125,7 +124,7 @@ void ChargeGPUMemoryT::clear_atomic() {
 // Copy neighbor list from host
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
+int * BaseChargeT::reset_nbors(const int nall, const int inum, int *ilist,
                                    int *numj, int **firstneigh, bool &success) {
   success=true;
 
@@ -150,7 +149,7 @@ int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
 // Build neighbor list on device
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
+inline void BaseChargeT::build_nbor_list(const int inum,
                                               const int host_inum,
                                               const int nall, double **host_x,
                                               int *host_type, double *sublo,
@@ -179,7 +178,7 @@ inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
 // Copy nbor list from host if necessary and then calculate forces, virials,..
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
+void BaseChargeT::compute(const int f_ago, const int inum_full,
                                const int nall, double **host_x, int *host_type,
                                int *ilist, int *numj, int **firstneigh,
                                const bool eflag, const bool vflag,
@@ -226,7 +225,7 @@ void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
 // Reneighbor on GPU if necessary and then compute forces, virials, energies
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
+int** BaseChargeT::compute(const int ago, const int inum_full,
                                 const int nall, double **host_x, int *host_type,
                                 double *sublo, double *subhi, int *tag,
                                 int **nspecial, int **special, const bool eflag, 
@@ -279,13 +278,13 @@ int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
 }
 
 template <class numtyp, class acctyp>
-double ChargeGPUMemoryT::host_memory_usage_atomic() const {
+double BaseChargeT::host_memory_usage_atomic() const {
   return device->atom.host_memory_usage()+nbor->host_memory_usage()+
-         4*sizeof(numtyp)+sizeof(ChargeGPUMemory<numtyp,acctyp>);
+         4*sizeof(numtyp)+sizeof(BaseCharge<numtyp,acctyp>);
 }
 
 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
+void BaseChargeT::compile_kernels(UCL_Device &dev, const char *pair_str) {
   if (_compiled)
     return;
 
@@ -302,5 +301,5 @@ void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
   _compiled=true;
 }
 
-template class ChargeGPUMemory<PRECISION,ACC_PRECISION>;
+template class BaseCharge<PRECISION,ACC_PRECISION>;
 
diff --git a/lib/gpu/base_charge.h b/lib/gpu/base_charge.h
index e2dcea36e7..844a427e01 100644
--- a/lib/gpu/base_charge.h
+++ b/lib/gpu/base_charge.h
@@ -1,22 +1,21 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                base_charge.h
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles needing per-particle data for position,
+  charge, and type.
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-#ifndef CHARGE_GPU_MEMORY_H
-#define CHARGE_GPU_MEMORY_H
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_BASE_CHARGE_H
+#define LAL_BASE_CHARGE_H
 
 #include "device.h"
 #include "balance.h"
@@ -29,10 +28,10 @@
 #endif
 
 template <class numtyp, class acctyp>
-class ChargeGPUMemory {
+class BaseCharge {
  public:
-  ChargeGPUMemory();
-  virtual ~ChargeGPUMemory();
+  BaseCharge();
+  virtual ~BaseCharge();
 
   /// Clear any previous data and set up for a new LAMMPS run
   /** \param max_nbors initial number of rows in the neighbor matrix
@@ -146,7 +145,7 @@ class ChargeGPUMemory {
   // -------------------------- DEVICE DATA ------------------------- 
 
   /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;
 
   /// Geryon device
   UCL_Device *ucl_device;
@@ -155,7 +154,7 @@ class ChargeGPUMemory {
   UCL_Timer time_pair;
 
   /// Host device load balancer
-  PairGPUBalance<numtyp,acctyp> hd_balancer;
+  Balance<numtyp,acctyp> hd_balancer;
 
   /// LAMMPS pointer for screen output
   FILE *screen;
@@ -163,17 +162,17 @@ class ChargeGPUMemory {
   // --------------------------- ATOM DATA --------------------------
 
   /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;
 
 
   // ------------------------ FORCE/ENERGY DATA -----------------------
 
-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;
 
   // --------------------------- NBOR DATA ----------------------------
 
   /// Neighbor data
-  PairGPUNbor *nbor;
+  Neighbor *nbor;
 
   /// True if we need to accumulate time for neighboring
   bool nbor_time_avail;
diff --git a/lib/gpu/base_ellipsoid.cpp b/lib/gpu/base_ellipsoid.cpp
index b2223db59d..a3518a0427 100644
--- a/lib/gpu/base_ellipsoid.cpp
+++ b/lib/gpu/base_ellipsoid.cpp
@@ -1,7 +1,7 @@
 /***************************************************************************
                               base_ellipsoid.cpp
                              -------------------
-                               W. Michael Brown
+                            W. Michael Brown (ORNL)
 
   Base class for acceleration of ellipsoid potentials
 
@@ -23,13 +23,13 @@ using namespace LAMMPS_AL;
 #endif
 
 #define BaseEllipsoidT BaseEllipsoid<numtyp, acctyp>
-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;
 
 template <class numtyp, class acctyp>
 BaseEllipsoidT::BaseEllipsoid() : _compiled(false), _max_bytes(0) {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
-  nbor=new PairGPUNbor();
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
+  nbor=new Neighbor();
 }
 
 template <class numtyp, class acctyp>
@@ -56,7 +56,7 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
   _ellipsoid_sphere=ellip_sphere;
 
   bool gpu_nbor=false;
-  if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
+  if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
     gpu_nbor=true;
 
   int _gpu_host=0;
diff --git a/lib/gpu/base_ellipsoid.h b/lib/gpu/base_ellipsoid.h
index bcec50437b..163892db54 100644
--- a/lib/gpu/base_ellipsoid.h
+++ b/lib/gpu/base_ellipsoid.h
@@ -1,7 +1,7 @@
 /***************************************************************************
                                base_ellipsoid.h
                              -------------------
-                               W. Michael Brown
+                            W. Michael Brown (ORNL)
 
   Base class for acceleration of ellipsoid potentials
 
@@ -13,8 +13,8 @@
     email                : brownw@ornl.gov
  ***************************************************************************/
 
-#ifndef BASE_ELLIPSOID_H
-#define BASE_ELLIPSOID_H
+#ifndef LAL_BASE_ELLIPSOID_H
+#define LAL_BASE_ELLIPSOID_H
 
 #include "device.h"
 #include "balance.h"
@@ -182,7 +182,7 @@ class BaseEllipsoid {
   // -------------------------- DEVICE DATA ------------------------- 
 
   /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;
 
   /// Geryon device
   UCL_Device *ucl_device;
@@ -192,7 +192,7 @@ class BaseEllipsoid {
   UCL_Timer time_nbor3, time_ellipsoid3;
 
   /// Host device load balancer
-  PairGPUBalance<numtyp,acctyp> hd_balancer;
+  Balance<numtyp,acctyp> hd_balancer;
 
   /// LAMMPS pointer for screen output
   FILE *screen;
@@ -200,7 +200,7 @@ class BaseEllipsoid {
   // --------------------------- ATOM DATA --------------------------
 
   /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;
 
   // --------------------------- TYPE DATA -------------------------- 
 
@@ -209,12 +209,12 @@ class BaseEllipsoid {
 
   // ------------------------ FORCE/ENERGY DATA -----------------------
 
-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;
 
   // --------------------------- NBOR DATA ----------------------------
 
   /// Neighbor data
-  PairGPUNbor *nbor;
+  Neighbor *nbor;
   /// ilist with particles sorted by type
   UCL_H_Vec<int> host_olist;
   /// True if we need to accumulate time for neighboring
diff --git a/lib/gpu/cg_cmm.cpp b/lib/gpu/cg_cmm.cpp
index 4b83843abb..417479ea43 100644
--- a/lib/gpu/cg_cmm.cpp
+++ b/lib/gpu/cg_cmm.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define CMM_GPU_MemoryT CMM_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-CMM_GPU_MemoryT::CMM_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+CMM_GPU_MemoryT::CMM_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }
 
 template <class numtyp, class acctyp>
diff --git a/lib/gpu/cg_cmm.h b/lib/gpu/cg_cmm.h
index b43d4180fd..5173114760 100644
--- a/lib/gpu/cg_cmm.h
+++ b/lib/gpu/cg_cmm.h
@@ -21,7 +21,7 @@
 #include "base_atomic.h"
 
 template <class numtyp, class acctyp>
-class CMM_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class CMM_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
  public:
   CMM_GPU_Memory();
   ~CMM_GPU_Memory(); 
diff --git a/lib/gpu/cg_cmm_long.cpp b/lib/gpu/cg_cmm_long.cpp
index 551ae867b0..8a584a2c32 100644
--- a/lib/gpu/cg_cmm_long.cpp
+++ b/lib/gpu/cg_cmm_long.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define CMML_GPU_MemoryT CMML_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-CMML_GPU_MemoryT::CMML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+CMML_GPU_MemoryT::CMML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                     _allocated(false) {
 }
 
diff --git a/lib/gpu/cg_cmm_long.h b/lib/gpu/cg_cmm_long.h
index a3324c5040..079472be70 100644
--- a/lib/gpu/cg_cmm_long.h
+++ b/lib/gpu/cg_cmm_long.h
@@ -21,7 +21,7 @@
 #include "base_charge.h"
 
 template <class numtyp, class acctyp>
-class CMML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class CMML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
  public:
   CMML_GPU_Memory();
   ~CMML_GPU_Memory();
diff --git a/lib/gpu/cg_cmm_msm.cpp b/lib/gpu/cg_cmm_msm.cpp
index 1912de1ba2..c0d09aa30c 100644
--- a/lib/gpu/cg_cmm_msm.cpp
+++ b/lib/gpu/cg_cmm_msm.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define CMMM_GPU_MemoryT CMMM_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-CMMM_GPU_MemoryT::CMMM_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+CMMM_GPU_MemoryT::CMMM_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                     _allocated(false) {
 }
 
diff --git a/lib/gpu/cg_cmm_msm.h b/lib/gpu/cg_cmm_msm.h
index 9be97de50d..afc054aadf 100644
--- a/lib/gpu/cg_cmm_msm.h
+++ b/lib/gpu/cg_cmm_msm.h
@@ -21,7 +21,7 @@
 #include "base_charge.h"
 
 template <class numtyp, class acctyp>
-class CMMM_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class CMMM_GPU_Memory : public BaseCharge<numtyp, acctyp> {
  public:
   CMMM_GPU_Memory();
   ~CMMM_GPU_Memory();
diff --git a/lib/gpu/charmm_long.cpp b/lib/gpu/charmm_long.cpp
index 87e1e8be8c..d949e3456d 100644
--- a/lib/gpu/charmm_long.cpp
+++ b/lib/gpu/charmm_long.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define CRML_GPU_MemoryT CRML_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-CRML_GPU_MemoryT::CRML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+CRML_GPU_MemoryT::CRML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                     _allocated(false) {
 }
 
diff --git a/lib/gpu/charmm_long.h b/lib/gpu/charmm_long.h
index 634a528a3a..6de168e0aa 100644
--- a/lib/gpu/charmm_long.h
+++ b/lib/gpu/charmm_long.h
@@ -21,7 +21,7 @@
 #include "base_charge.h"
 
 template <class numtyp, class acctyp>
-class CRML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class CRML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
  public:
   CRML_GPU_Memory();
   ~CRML_GPU_Memory();
diff --git a/lib/gpu/device.cpp b/lib/gpu/device.cpp
index eccba0786e..e6cb60b2a6 100644
--- a/lib/gpu/device.cpp
+++ b/lib/gpu/device.cpp
@@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  device.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for management of the device where the computations are performed
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
 #include "device.h"
 #include "precision.h"
@@ -29,21 +27,21 @@
 #include "pair_gpu_dev_ptx.h"
 #endif
 
-#define PairGPUDeviceT PairGPUDevice<numtyp, acctyp>
+#define DeviceT Device<numtyp, acctyp>
 
 template <class numtyp, class acctyp>
-PairGPUDeviceT::PairGPUDevice() : _init_count(0), _device_init(false),
+DeviceT::Device() : _init_count(0), _device_init(false),
                                   _gpu_mode(GPU_FORCE), _first_device(0),
                                   _last_device(0), _compiled(false) {
 }
 
 template <class numtyp, class acctyp>
-PairGPUDeviceT::~PairGPUDevice() {
+DeviceT::~Device() {
   clear_device();
 }
 
 template <class numtyp, class acctyp>
-int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica, 
+int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, 
                                 const int first_gpu, const int last_gpu,
                                 const int gpu_mode, const double p_split,
                                 const int nthreads, const int t_per_atom) {
@@ -135,10 +133,10 @@ int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica,
 }
 
 template <class numtyp, class acctyp>
-int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
+int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
                          const bool rot, const int nlocal, 
                          const int host_nlocal, const int nall,
-                         PairGPUNbor *nbor, const int maxspecial,
+                         Neighbor *nbor, const int maxspecial,
                          const int gpu_host, const int max_nbors, 
                          const double cell_size, const bool pre_cut) {
   if (!_device_init)
@@ -181,7 +179,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
   if (!ans.init(ef_nlocal,charge,rot,*gpu))
     return -3;
 
-  if (!nbor->init(&_nbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
+  if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
                   *gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d, 
                   _block_cell_id, _block_nbor_build))
     return -3;
@@ -192,7 +190,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
 }
 
 template <class numtyp, class acctyp>
-int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
+int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
                          const int nall) {
   if (!_device_init)
     return -1;                          
@@ -215,21 +213,21 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
 }
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::set_single_precompute
+void DeviceT::set_single_precompute
                      (PPPMGPUMemory<numtyp,acctyp,float,_lgpu_float4> *pppm) {
   _long_range_precompute=1;
   pppm_single=pppm;
 }
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::set_double_precompute
+void DeviceT::set_double_precompute
                      (PPPMGPUMemory<numtyp,acctyp,double,_lgpu_double4> *pppm) {
   _long_range_precompute=2;
   pppm_double=pppm;
 }
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::init_message(FILE *screen, const char *name,
+void DeviceT::init_message(FILE *screen, const char *name,
                                   const int first_gpu, const int last_gpu) {
   #ifdef USE_OPENCL
   std::string fs="";
@@ -272,7 +270,7 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name,
 }
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls, 
+void DeviceT::estimate_gpu_overhead(const int kernel_calls, 
                                            double &gpu_overhead,
                                            double &gpu_driver_overhead) {
   UCL_H_Vec<int> *host_data_in=NULL, *host_data_out=NULL;
@@ -384,9 +382,9 @@ void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls,
 }              
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::output_times(UCL_Timer &time_pair, 
-                                  PairGPUAns<numtyp,acctyp> &ans, 
-                                  PairGPUNbor &nbor, const double avg_split, 
+void DeviceT::output_times(UCL_Timer &time_pair, 
+                                  Answer<numtyp,acctyp> &ans, 
+                                  Neighbor &nbor, const double avg_split, 
                                   const double max_bytes, 
                                   const double gpu_overhead,
                                   const double driver_overhead, 
@@ -440,12 +438,12 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
 }
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in, 
+void DeviceT::output_kspace_times(UCL_Timer &time_in, 
                                          UCL_Timer &time_out,
                                          UCL_Timer &time_map,
                                          UCL_Timer &time_rho,
                                          UCL_Timer &time_interp,
-                                         PairGPUAns<numtyp,acctyp> &ans, 
+                                         Answer<numtyp,acctyp> &ans, 
                                          const double max_bytes, 
                                          const double cpu_time, 
                                          const double idle_time, FILE *screen) {
@@ -500,13 +498,13 @@ void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in,
 }
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::clear() {
+void DeviceT::clear() {
   if (_init_count>0) {
     _long_range_precompute=0;
     _init_count--;
     if (_init_count==0) {
       atom.clear();
-      _nbor_shared.clear();
+      _neighbor_shared.clear();
       if (_compiled) {
         k_zero.clear();
         k_info.clear();
@@ -518,7 +516,7 @@ void PairGPUDeviceT::clear() {
 }
 
 template <class numtyp, class acctyp>
-void PairGPUDeviceT::clear_device() {
+void DeviceT::clear_device() {
   while (_init_count>0)
     clear();
   if (_device_init) {
@@ -528,7 +526,7 @@ void PairGPUDeviceT::clear_device() {
 }
 
 template <class numtyp, class acctyp>
-int PairGPUDeviceT::compile_kernels() {
+int DeviceT::compile_kernels() {
   int flag=0;
 
   if (_compiled)
@@ -588,27 +586,27 @@ int PairGPUDeviceT::compile_kernels() {
 }
 
 template <class numtyp, class acctyp>
-double PairGPUDeviceT::host_memory_usage() const {
+double DeviceT::host_memory_usage() const {
   return atom.host_memory_usage()+4*sizeof(numtyp)+
-         sizeof(PairGPUDevice<numtyp,acctyp>);
+         sizeof(Device<numtyp,acctyp>);
 }
 
-template class PairGPUDevice<PRECISION,ACC_PRECISION>;
-PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+template class Device<PRECISION,ACC_PRECISION>;
+Device<PRECISION,ACC_PRECISION> global_device;
 
 int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
                     const int last_gpu, const int gpu_mode, 
                     const double particle_split, const int nthreads,
                     const int t_per_atom) {
-  return pair_gpu_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
+  return global_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
                                      particle_split,nthreads,t_per_atom);
 }
 
 void lmp_clear_device() {
-  pair_gpu_device.clear_device();
+  global_device.clear_device();
 }
 
 double lmp_gpu_forces(double **f, double **tor, double *eatom,
                       double **vatom, double *virial, double &ecoul) {
-  return pair_gpu_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
+  return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
 }
diff --git a/lib/gpu/device.cu b/lib/gpu/device.cu
index 73411c99ff..6c2f3606c0 100644
--- a/lib/gpu/device.cu
+++ b/lib/gpu/device.cu
@@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS-Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+// **************************************************************************
+//                                  device.cu
+//                             -------------------
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for device information
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : brownw@ornl.gov
+// ***************************************************************************/
 
 /*************************************************************************
                            Preprocessor Definitions
@@ -63,9 +61,6 @@
 
 *************************************************************************/
 
-#ifndef PAIR_GPU_DEV_KERNEL
-#define PAIR_GPU_DEV_KERNEL
-
 #ifdef NV_KERNEL
 
 #include "nv_kernel_def.h"
@@ -116,5 +111,3 @@ __kernel void kernel_info(__global int *info) {
   info[13]=THREADS_PER_CHARGE;
 }
 
-#endif
-
diff --git a/lib/gpu/device.h b/lib/gpu/device.h
index 5b5a5c7912..7aab94f243 100644
--- a/lib/gpu/device.h
+++ b/lib/gpu/device.h
@@ -1,26 +1,24 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  device.h
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for management of the device where the computations are performed
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
-#ifndef PAIR_GPU_DEVICE_H
-#define PAIR_GPU_DEVICE_H
+#ifndef LAL_DEVICE_H
+#define LAL_DEVICE_H
 
 #include "atom.h"
-#include "ans.h"
-#include "nbor.h"
+#include "answer.h"
+#include "neighbor.h"
 #include "pppm.h"
 #include "mpi.h"
 #include <sstream>
@@ -32,10 +30,10 @@ template <class numtyp, class acctyp,
           class grdtyp, class grdtyp4> class PPPMGPUMemory;
 
 template <class numtyp, class acctyp>
-class PairGPUDevice {
+class Device {
  public:
-  PairGPUDevice();
-  ~PairGPUDevice(); 
+  Device();
+  ~Device(); 
  
   /// Initialize the device for use by this process
   /** Sets up a per-device MPI communicator for load balancing and initializes
@@ -68,9 +66,9 @@ class PairGPUDevice {
     * - -3 if there is an out of memory error
     * - -4 if the GPU library was not compiled for GPU
     * - -5 Double precision is not supported on card **/
-  int init(PairGPUAns<numtyp,acctyp> &a, const bool charge, const bool rot,
+  int init(Answer<numtyp,acctyp> &a, const bool charge, const bool rot,
            const int nlocal, const int host_nlocal, const int nall,
-           PairGPUNbor *nbor, const int maxspecial, const int gpu_host,
+           Neighbor *nbor, const int maxspecial, const int gpu_host,
            const int max_nbors, const double cell_size, const bool pre_cut);
 
   /// Initialize the device for Atom storage only
@@ -83,7 +81,7 @@ class PairGPUDevice {
     * - -3 if there is an out of memory error
     * - -4 if the GPU library was not compiled for GPU
     * - -5 Double precision is not supported on card **/
-  int init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal, const int nall);
+  int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
 
   /// Output a message for pair_style acceleration with device stats
   void init_message(FILE *screen, const char *name,
@@ -109,8 +107,8 @@ class PairGPUDevice {
   inline bool double_precision() { return gpu->double_precision(); }
   
   /// Output a message with timing information
-  void output_times(UCL_Timer &time_pair, PairGPUAns<numtyp,acctyp> &ans, 
-                    PairGPUNbor &nbor, const double avg_split, 
+  void output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans, 
+                    Neighbor &nbor, const double avg_split, 
                     const double max_bytes, const double gpu_overhead,
                     const double driver_overhead, 
                     const int threads_per_atom, FILE *screen);
@@ -119,7 +117,7 @@ class PairGPUDevice {
   void output_kspace_times(UCL_Timer &time_in, UCL_Timer &time_out,
                            UCL_Timer & time_map, UCL_Timer & time_rho,
                            UCL_Timer &time_interp, 
-                           PairGPUAns<numtyp,acctyp> &ans, 
+                           Answer<numtyp,acctyp> &ans, 
                            const double max_bytes, const double cpu_time,
                            const double cpu_idle_time, FILE *screen);
 
@@ -130,7 +128,7 @@ class PairGPUDevice {
   void clear_device();
 
   /// Add an answer object for putting forces, energies, etc from GPU to LAMMPS
-  inline void add_ans_object(PairGPUAns<numtyp,acctyp> *ans)
+  inline void add_ans_object(Answer<numtyp,acctyp> *ans)
     { ans_queue.push(ans); }
 
   /// Add "answers" (force,energies,etc.) into LAMMPS structures
@@ -248,12 +246,12 @@ class PairGPUDevice {
   // --------------------------- ATOM DATA -------------------------- 
 
   /// Atom Data
-  PairGPUAtom<numtyp,acctyp> atom;
+  Atom<numtyp,acctyp> atom;
 
   // --------------------------- NBOR DATA ----------------------------
   
   /// Neighbor Data
-  PairGPUNborShared _nbor_shared;
+  NeighborShared _neighbor_shared;
 
   // ------------------------ LONG RANGE DATA -------------------------
   
@@ -274,7 +272,7 @@ class PairGPUDevice {
   }
 
  private:
-  std::queue<PairGPUAns<numtyp,acctyp> *> ans_queue;
+  std::queue<Answer<numtyp,acctyp> *> ans_queue;
   int _init_count;
   bool _device_init, _host_timer_started, _time_device;
   MPI_Comm _comm_world, _comm_replica, _comm_gpu;
diff --git a/lib/gpu/ellipsoid_extra.h b/lib/gpu/ellipsoid_extra.h
index 9d54efdeb9..62efadf181 100644
--- a/lib/gpu/ellipsoid_extra.h
+++ b/lib/gpu/ellipsoid_extra.h
@@ -1,7 +1,7 @@
 // **************************************************************************
 //                              ellipsoid_extra.h
 //                             -------------------
-//                               W. Michael Brown
+//                           W. Michael Brown (ORNL)
 //
 //  Device code for Ellipsoid math routines
 //
@@ -13,8 +13,8 @@
 //    email                : brownw@ornl.gov
 // ***************************************************************************/
 
-#ifndef ELLIPSOID_EXTRA_H
-#define ELLIPSOID_EXTRA_H
+#ifndef LAL_ELLIPSOID_EXTRA_H
+#define LAL_ELLIPSOID_EXTRA_H
 
 enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
 
@@ -409,16 +409,4 @@ __inline void gpu_times_column3(const numtyp m[9], const numtyp v[3],
   ans[2] = m[6]*v[0] + m[7]*v[1] + m[8]*v[2];
 }
 
-
-
-
-
-
-
-
-
-
-
-
-
 #endif
diff --git a/lib/gpu/ellipsoid_nbor.cu b/lib/gpu/ellipsoid_nbor.cu
index e91d356b4e..74b6be2cfa 100644
--- a/lib/gpu/ellipsoid_nbor.cu
+++ b/lib/gpu/ellipsoid_nbor.cu
@@ -1,7 +1,7 @@
 // **************************************************************************
 //                              ellipsoid_nbor.cu
 //                             -------------------
-//                               W. Michael Brown
+//                           W. Michael Brown (ORNL)
 //
 //  Device code for Ellipsoid neighbor routines
 //
@@ -13,9 +13,6 @@
 //    email                : brownw@ornl.gov
 // ***************************************************************************/
 
-#ifndef ELLIPSOID_NBOR_H
-#define ELLIPSOID_NBOR_H
-
 #ifdef NV_KERNEL
 
 #include "nv_kernel_def.h"
@@ -161,5 +158,3 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form,
     dev_nbor[ii+nbor_pitch]=newj;
   }
 }
-
-#endif
diff --git a/lib/gpu/gayberne.cpp b/lib/gpu/gayberne.cpp
index 54ddc7f9e6..b1433ee7fb 100644
--- a/lib/gpu/gayberne.cpp
+++ b/lib/gpu/gayberne.cpp
@@ -24,7 +24,7 @@
 using namespace LAMMPS_AL;
 
 #define GayBerneT GayBerne<numtyp, acctyp>
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
 GayBerneT::GayBerne() : BaseEllipsoid<numtyp,acctyp>(),
diff --git a/lib/gpu/lj.cpp b/lib/gpu/lj.cpp
index 16bf7ae92b..58ff72ffe0 100644
--- a/lib/gpu/lj.cpp
+++ b/lib/gpu/lj.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define LJL_GPU_MemoryT LJL_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-LJL_GPU_MemoryT::LJL_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+LJL_GPU_MemoryT::LJL_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }
 
 template <class numtyp, class acctyp>
diff --git a/lib/gpu/lj.h b/lib/gpu/lj.h
index f90f372349..8e9989beb9 100644
--- a/lib/gpu/lj.h
+++ b/lib/gpu/lj.h
@@ -21,7 +21,7 @@
 #include "base_atomic.h"
 
 template <class numtyp, class acctyp>
-class LJL_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class LJL_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
  public:
   LJL_GPU_Memory();
   ~LJL_GPU_Memory(); 
diff --git a/lib/gpu/lj96.cpp b/lib/gpu/lj96.cpp
index 4fb04716c1..d6477d6c3b 100644
--- a/lib/gpu/lj96.cpp
+++ b/lib/gpu/lj96.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define LJ96_GPU_MemoryT LJ96_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-LJ96_GPU_MemoryT::LJ96_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+LJ96_GPU_MemoryT::LJ96_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }
 
 template <class numtyp, class acctyp>
diff --git a/lib/gpu/lj96.h b/lib/gpu/lj96.h
index 187283c206..e68e8e4fad 100644
--- a/lib/gpu/lj96.h
+++ b/lib/gpu/lj96.h
@@ -21,7 +21,7 @@
 #include "base_atomic.h"
 
 template <class numtyp, class acctyp>
-class LJ96_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class LJ96_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
  public:
   LJ96_GPU_Memory();
   ~LJ96_GPU_Memory();
diff --git a/lib/gpu/lj_class2_long.cpp b/lib/gpu/lj_class2_long.cpp
index 5b56bf0b3e..5c47df6b31 100644
--- a/lib/gpu/lj_class2_long.cpp
+++ b/lib/gpu/lj_class2_long.cpp
@@ -25,10 +25,10 @@ using namespace LAMMPS_AL;
 
 #define LJClass2LongT LJClass2Long<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-LJClass2LongT::LJClass2Long() : ChargeGPUMemory<numtyp,acctyp>(),
+LJClass2LongT::LJClass2Long() : BaseCharge<numtyp,acctyp>(),
                                 _allocated(false) {
 }
 
diff --git a/lib/gpu/lj_class2_long.h b/lib/gpu/lj_class2_long.h
index fff2f68994..c263c01e57 100644
--- a/lib/gpu/lj_class2_long.h
+++ b/lib/gpu/lj_class2_long.h
@@ -21,7 +21,7 @@
 namespace LAMMPS_AL {
 
 template <class numtyp, class acctyp>
-class LJClass2Long : public ChargeGPUMemory<numtyp, acctyp> {
+class LJClass2Long : public BaseCharge<numtyp, acctyp> {
  public:
   LJClass2Long();
   ~LJClass2Long();
diff --git a/lib/gpu/lj_coul.cpp b/lib/gpu/lj_coul.cpp
index c6eaa74080..1304195b4b 100644
--- a/lib/gpu/lj_coul.cpp
+++ b/lib/gpu/lj_coul.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define LJC_GPU_MemoryT LJC_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-LJC_GPU_MemoryT::LJC_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+LJC_GPU_MemoryT::LJC_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                     _allocated(false) {
 }
 
diff --git a/lib/gpu/lj_coul.h b/lib/gpu/lj_coul.h
index 5d1d325a56..628af7b430 100644
--- a/lib/gpu/lj_coul.h
+++ b/lib/gpu/lj_coul.h
@@ -21,7 +21,7 @@
 #include "base_charge.h"
 
 template <class numtyp, class acctyp>
-class LJC_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class LJC_GPU_Memory : public BaseCharge<numtyp, acctyp> {
  public:
   LJC_GPU_Memory();
   ~LJC_GPU_Memory();
diff --git a/lib/gpu/lj_coul_long.cpp b/lib/gpu/lj_coul_long.cpp
index 926906bb62..00a666d502 100644
--- a/lib/gpu/lj_coul_long.cpp
+++ b/lib/gpu/lj_coul_long.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define LJCL_GPU_MemoryT LJCL_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-LJCL_GPU_MemoryT::LJCL_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+LJCL_GPU_MemoryT::LJCL_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                     _allocated(false) {
 }
 
diff --git a/lib/gpu/lj_coul_long.h b/lib/gpu/lj_coul_long.h
index 9f10c9a888..3e9502ed41 100644
--- a/lib/gpu/lj_coul_long.h
+++ b/lib/gpu/lj_coul_long.h
@@ -21,7 +21,7 @@
 #include "base_charge.h"
 
 template <class numtyp, class acctyp>
-class LJCL_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class LJCL_GPU_Memory : public BaseCharge<numtyp, acctyp> {
  public:
   LJCL_GPU_Memory();
   ~LJCL_GPU_Memory();
diff --git a/lib/gpu/lj_expand.cpp b/lib/gpu/lj_expand.cpp
index bb3fccad8e..67860980d3 100644
--- a/lib/gpu/lj_expand.cpp
+++ b/lib/gpu/lj_expand.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define LJE_GPU_MemoryT LJE_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-LJE_GPU_MemoryT::LJE_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+LJE_GPU_MemoryT::LJE_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }
 
 template <class numtyp, class acctyp>
diff --git a/lib/gpu/lj_expand.h b/lib/gpu/lj_expand.h
index 7e6bdd92fb..22ea145218 100644
--- a/lib/gpu/lj_expand.h
+++ b/lib/gpu/lj_expand.h
@@ -21,7 +21,7 @@
 #include "base_atomic.h"
 
 template <class numtyp, class acctyp>
-class LJE_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class LJE_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
  public:
   LJE_GPU_Memory();
   ~LJE_GPU_Memory();
diff --git a/lib/gpu/morse.cpp b/lib/gpu/morse.cpp
index c23ea587bf..a9298333fc 100644
--- a/lib/gpu/morse.cpp
+++ b/lib/gpu/morse.cpp
@@ -25,10 +25,10 @@
 #include <cassert>
 #define MOR_GPU_MemoryT MOR_GPU_Memory<numtyp, acctyp>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
-MOR_GPU_MemoryT::MOR_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+MOR_GPU_MemoryT::MOR_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }
 
 template <class numtyp, class acctyp>
diff --git a/lib/gpu/morse.h b/lib/gpu/morse.h
index 9b718d354e..84f298a702 100644
--- a/lib/gpu/morse.h
+++ b/lib/gpu/morse.h
@@ -21,7 +21,7 @@
 #include "base_atomic.h"
 
 template <class numtyp, class acctyp>
-class MOR_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class MOR_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
  public:
   MOR_GPU_Memory();
   ~MOR_GPU_Memory(); 
diff --git a/lib/gpu/nbor_cpu.cu b/lib/gpu/nbor_cpu.cu
deleted file mode 100644
index 238023b429..0000000000
--- a/lib/gpu/nbor_cpu.cu
+++ /dev/null
@@ -1,46 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
-
-#ifdef NV_KERNEL
-
-#include "geryon/ucl_nv_kernel.h"
-
-#else
-
-#define GLOBAL_ID_X get_global_id(0)
-
-#endif
-
-__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
-                            const int inum) {
-  // ii indexes the two interacting particles in gi
-  int ii=GLOBAL_ID_X;
-
-  if (ii<inum) {
-    __global int *nbor=dev_nbor+ii+inum;
-    int numj=*nbor;
-    nbor+=inum;
-    __global int *list=dev_ij+*nbor;
-    __global int *list_end=list+numj;
-  
-    for ( ; list<list_end; list++) {
-      *nbor=*list;
-      nbor+=inum;
-    }
-  } // if ii
-}
-
diff --git a/lib/gpu/nbor_shared.h b/lib/gpu/nbor_shared.h
deleted file mode 100644
index 978ba2fc6a..0000000000
--- a/lib/gpu/nbor_shared.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
-
-#ifndef PAIR_GPU_NBOR_SHARED_H
-#define PAIR_GPU_NBOR_SHARED_H
-
-#ifdef USE_OPENCL
-
-#include "geryon/ocl_kernel.h"
-#include "geryon/ocl_texture.h"
-using namespace ucl_opencl;
-
-#else
-
-#include "geryon/nvd_kernel.h"
-#include "geryon/nvd_texture.h"
-using namespace ucl_cudadr;
-
-#endif
-
-class PairGPUNborShared {
- public:
-  PairGPUNborShared() : _compiled(false) {}
-  ~PairGPUNborShared() { clear(); }
- 
-  /// Free all memory on host and device
-  void clear();
-
-  /// Texture for cached position/type access with CUDA
-  UCL_Texture neigh_tex;
-
-  /// Compile kernels for neighbor lists
-  void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
-
-  // ----------------------------- Kernels
-  UCL_Program *nbor_program, *build_program;
-  UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
-  UCL_Kernel k_transpose, k_special;
-
- private:
-  bool _compiled, _gpu_nbor;
-};
-
-#endif
diff --git a/lib/gpu/nbor.cpp b/lib/gpu/neighbor.cpp
similarity index 89%
rename from lib/gpu/nbor.cpp
rename to lib/gpu/neighbor.cpp
index c1b6c37e90..be8ce5e9af 100644
--- a/lib/gpu/nbor.cpp
+++ b/lib/gpu/neighbor.cpp
@@ -1,27 +1,25 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                 neighbor.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)
+                              Peng Wang (Nvidia)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for handling neighbor lists
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-                         Peng Wang (Nvidia), penwang@nvidia.com
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov, penwang@nvidia.com
+ ***************************************************************************/
 
 #include "precision.h"
-#include "nbor.h"
+#include "neighbor.h"
 #include "device.h"
 #include "math.h"
 
-int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
+int Neighbor::bytes_per_atom(const int max_nbors) const {
   if (_gpu_nbor)
     return (max_nbors+2)*sizeof(int);
   else if (_use_packing)
@@ -30,7 +28,7 @@ int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
     return (max_nbors+3)*sizeof(int);
 }
 
-bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
+bool Neighbor::init(NeighborShared *shared, const int inum,
                        const int host_inum, const int max_nbors, 
                        const int maxspecial, UCL_Device &devi, 
                        const bool gpu_nbor, const int gpu_host, 
@@ -89,7 +87,7 @@ bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
   return success;
 }
 
-void PairGPUNbor::alloc(bool &success) { 
+void Neighbor::alloc(bool &success) { 
   dev_nbor.clear();
   host_acc.clear();
   int nt=_max_atoms+_max_host;
@@ -156,7 +154,7 @@ void PairGPUNbor::alloc(bool &success) {
   _allocated=true;
 }
   
-void PairGPUNbor::clear() {
+void Neighbor::clear() {
   _gpu_bytes=0.0;
   _cell_bytes=0.0;
   _c_bytes=0.0;
@@ -181,7 +179,7 @@ void PairGPUNbor::clear() {
   }
 }
 
-double PairGPUNbor::host_memory_usage() const {
+double Neighbor::host_memory_usage() const {
   if (_gpu_nbor) {
     if (_gpu_host)
       return host_nbor.row_bytes()*host_nbor.rows()+host_ilist.row_bytes()+
@@ -190,10 +188,10 @@ double PairGPUNbor::host_memory_usage() const {
       return 0;
   } else 
     return host_packed.row_bytes()*host_packed.rows()+host_acc.row_bytes()+
-           sizeof(PairGPUNbor);
+           sizeof(Neighbor);
 }
 
-void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
+void Neighbor::get_host(const int inum, int *ilist, int *numj,
                            int **firstneigh, const int block_size) {  
   time_nbor.start();
 
@@ -258,9 +256,9 @@ void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
 }
 
 template <class numtyp, class acctyp>
-void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
+void Neighbor::build_nbor_list(const int inum, const int host_inum,
                                   const int nall, 
-                                  PairGPUAtom<numtyp,acctyp> &atom, 
+                                  Atom<numtyp,acctyp> &atom, 
                                   double *sublo, double *subhi, int *tag, 
                                   int **nspecial, int **special, bool &success,
                                   int &mn) {
@@ -399,8 +397,8 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
   time_nbor.stop();
 }
 
-template void PairGPUNbor::build_nbor_list<PRECISION,ACC_PRECISION>
+template void Neighbor::build_nbor_list<PRECISION,ACC_PRECISION>
      (const int inum, const int host_inum, const int nall,
-      PairGPUAtom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
+      Atom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
       int *, int **, int **, bool &success, int &mn);
 
diff --git a/lib/gpu/nbor.h b/lib/gpu/neighbor.h
similarity index 84%
rename from lib/gpu/nbor.h
rename to lib/gpu/neighbor.h
index 0bb2ceaa0c..5242b154b9 100644
--- a/lib/gpu/nbor.h
+++ b/lib/gpu/neighbor.h
@@ -1,25 +1,24 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  neighbor.h
+                             -------------------
+                            W. Michael Brown (ORNL)
+                              Peng Wang (Nvidia)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for handling neighbor lists
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov, penwang@nvidia.com
+ ***************************************************************************/
 
-#ifndef PAIR_GPU_NBOR_H
-#define PAIR_GPU_NBOR_H
+#ifndef LAL_NEIGHBOR_H
+#define LAL_NEIGHBOR_H
 
 #include "atom.h"
-#include "nbor_shared.h"
+#include "neighbor_shared.h"
 
 #define IJ_SIZE 131072
 
@@ -37,10 +36,10 @@ using namespace ucl_cudadr;
 
 #endif
 
-class PairGPUNbor {
+class Neighbor {
  public:
-  PairGPUNbor() : _allocated(false), _use_packing(false) {}
-  ~PairGPUNbor() { clear(); }
+  Neighbor() : _allocated(false), _use_packing(false) {}
+  ~Neighbor() { clear(); }
  
   /// Determine whether neighbor unpacking should be used
   /** If false, twice as much memory is reserved to allow unpacking neighbors by 
@@ -57,7 +56,7 @@ class PairGPUNbor {
     *                 2 if gpu_nbor is true, and host needs a full nbor list
     * \param pre_cut True if cutoff test will be performed in separate kernel
     *                than the force kernel **/
-  bool init(PairGPUNborShared *shared, const int inum, const int host_inum,
+  bool init(NeighborShared *shared, const int inum, const int host_inum,
             const int max_nbors, const int maxspecial, UCL_Device &dev,
             const bool gpu_nbor, const int gpu_host, const bool pre_cut,
             const int block_cell_2d, const int block_cell_id, 
@@ -138,7 +137,7 @@ class PairGPUNbor {
   /// Build nbor list on the device
   template <class numtyp, class acctyp>
   void build_nbor_list(const int inum, const int host_inum, const int nall,
-                       PairGPUAtom<numtyp,acctyp> &atom, double *sublo,
+                       Atom<numtyp,acctyp> &atom, double *sublo,
                        double *subhi, int *tag, int **nspecial, int **special, 
                        bool &success, int &max_nbors);
 
@@ -187,7 +186,7 @@ class PairGPUNbor {
   UCL_Timer time_nbor, time_kernel;
   
  private:
-  PairGPUNborShared *_shared;
+  NeighborShared *_shared;
   UCL_Device *dev;
   bool _allocated, _use_packing;
   int _max_atoms, _max_nbors, _max_host, _nbor_pitch, _maxspecial;
diff --git a/lib/gpu/neighbor_cpu.cu b/lib/gpu/neighbor_cpu.cu
new file mode 100644
index 0000000000..014561e829
--- /dev/null
+++ b/lib/gpu/neighbor_cpu.cu
@@ -0,0 +1,44 @@
+// **************************************************************************
+//                                  atom.cu
+//                             -------------------
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for handling CPU generated neighbor lists
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : brownw@ornl.gov
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+
+#include "geryon/ucl_nv_kernel.h"
+
+#else
+
+#define GLOBAL_ID_X get_global_id(0)
+
+#endif
+
+__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
+                            const int inum) {
+  // ii indexes the two interacting particles in gi
+  int ii=GLOBAL_ID_X;
+
+  if (ii<inum) {
+    __global int *nbor=dev_nbor+ii+inum;
+    int numj=*nbor;
+    nbor+=inum;
+    __global int *list=dev_ij+*nbor;
+    __global int *list_end=list+numj;
+  
+    for ( ; list<list_end; list++) {
+      *nbor=*list;
+      nbor+=inum;
+    }
+  } // if ii
+}
+
diff --git a/lib/gpu/nbor_gpu.cu b/lib/gpu/neighbor_gpu.cu
similarity index 89%
rename from lib/gpu/nbor_gpu.cu
rename to lib/gpu/neighbor_gpu.cu
index d1ed9fd743..ce2e9e97a1 100644
--- a/lib/gpu/nbor_gpu.cu
+++ b/lib/gpu/neighbor_gpu.cu
@@ -1,20 +1,18 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Peng Wang (Nvidia), penwang@nvidia.com
-                         Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+// **************************************************************************
+//                                  atom.cu
+//                             -------------------
+//                              Peng Wang (Nvidia)
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for handling GPU generated neighbor lists
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : penwang@nvidia.com, brownw@ornl.gov
+// ***************************************************************************/
 
 #ifdef NV_KERNEL
 
diff --git a/lib/gpu/nbor_shared.cpp b/lib/gpu/neighbor_shared.cpp
similarity index 52%
rename from lib/gpu/nbor_shared.cpp
rename to lib/gpu/neighbor_shared.cpp
index 08721065e5..87ad6b43b0 100644
--- a/lib/gpu/nbor_shared.cpp
+++ b/lib/gpu/neighbor_shared.cpp
@@ -1,21 +1,19 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                             neighbor_shared.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)
 
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for management of data shared by all neighbor lists
 
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
 
-#include "nbor_shared.h"
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#include "neighbor_shared.h"
 
 #ifdef USE_OPENCL
 #include "nbor_cl.h"
@@ -24,7 +22,7 @@
 #include "pair_gpu_build_ptx.h"
 #endif
   
-void PairGPUNborShared::clear() {
+void NeighborShared::clear() {
   if (_compiled) {
     if (_gpu_nbor) {
       k_cell_id.clear();
@@ -41,7 +39,7 @@ void PairGPUNborShared::clear() {
   }
 }
 
-void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
+void NeighborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
   if (_compiled)
   	return;
   	
@@ -50,7 +48,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
 
   if (gpu_nbor==false) {
     nbor_program=new UCL_Program(dev);
-    nbor_program->load_string(nbor_cpu,flags.c_str());
+    nbor_program->load_string(neighbor_cpu,flags.c_str());
     k_nbor.set_function(*nbor_program,"kernel_unpack");
   } else {
     build_program=new UCL_Program(dev);
@@ -58,7 +56,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
     std::cerr << "CANNOT CURRENTLY USE GPU NEIGHBORING WITH OPENCL\n";
     exit(1);
     #else
-    build_program->load_string(nbor_gpu,flags.c_str());
+    build_program->load_string(neighbor_gpu,flags.c_str());
     #endif
     k_cell_id.set_function(*build_program,"calc_cell_id");
     k_cell_counts.set_function(*build_program,"kernel_calc_cell_counts");
diff --git a/lib/gpu/neighbor_shared.h b/lib/gpu/neighbor_shared.h
new file mode 100644
index 0000000000..c2ea9b4ac7
--- /dev/null
+++ b/lib/gpu/neighbor_shared.h
@@ -0,0 +1,56 @@
+/***************************************************************************
+                              neighbor_shared.h
+                             -------------------
+                            W. Michael Brown (ORNL)
+
+  Class for management of data shared by all neighbor lists
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_NEIGHBOR_SHARED_H
+#define LAL_NEIGHBOR_SHARED_H
+
+#ifdef USE_OPENCL
+
+#include "geryon/ocl_kernel.h"
+#include "geryon/ocl_texture.h"
+using namespace ucl_opencl;
+
+#else
+
+#include "geryon/nvd_kernel.h"
+#include "geryon/nvd_texture.h"
+using namespace ucl_cudadr;
+
+#endif
+
+class NeighborShared {
+ public:
+  NeighborShared() : _compiled(false) {}
+  ~NeighborShared() { clear(); }
+ 
+  /// Free all memory on host and device
+  void clear();
+
+  /// Texture for cached position/type access with CUDA
+  UCL_Texture neigh_tex;
+
+  /// Compile kernels for neighbor lists
+  void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
+
+  // ----------------------------- Kernels
+  UCL_Program *nbor_program, *build_program;
+  UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
+  UCL_Kernel k_transpose, k_special;
+
+ private:
+  bool _compiled, _gpu_nbor;
+};
+
+#endif
diff --git a/lib/gpu/pppm.cpp b/lib/gpu/pppm.cpp
index 2bd394e9f6..8cf9754a59 100644
--- a/lib/gpu/pppm.cpp
+++ b/lib/gpu/pppm.cpp
@@ -26,13 +26,13 @@
 
 #define PPPMGPUMemoryT PPPMGPUMemory<numtyp, acctyp, grdtyp, grdtyp4>
 
-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;
 
 template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
 PPPMGPUMemoryT::PPPMGPUMemory() : _allocated(false), _compiled(false),
                                   _max_bytes(0) {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
 }
 
 template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
diff --git a/lib/gpu/pppm.h b/lib/gpu/pppm.h
index 8ebc428942..da9a3758a3 100644
--- a/lib/gpu/pppm.h
+++ b/lib/gpu/pppm.h
@@ -27,7 +27,7 @@
 #include "geryon/nvd_texture.h"
 #endif
 
-template <class numtyp, class acctyp> class PairGPUDevice;
+template <class numtyp, class acctyp> class Device;
 
 template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
 class PPPMGPUMemory {
@@ -118,7 +118,7 @@ class PPPMGPUMemory {
   // -------------------------- DEVICE DATA ------------------------- 
 
   /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;
 
   /// Geryon device
   UCL_Device *ucl_device;
@@ -132,7 +132,7 @@ class PPPMGPUMemory {
   // --------------------------- ATOM DATA --------------------------
 
   /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;
 
 
   // --------------------------- GRID DATA --------------------------
@@ -162,7 +162,7 @@ class PPPMGPUMemory {
 
   // ------------------------ FORCE/ENERGY DATA -----------------------
 
-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;
 
   // ------------------------- DEVICE KERNELS -------------------------
   UCL_Program *pppm_program;
diff --git a/lib/gpu/re_squared.cpp b/lib/gpu/re_squared.cpp
index adc4acfb56..851a0c2039 100644
--- a/lib/gpu/re_squared.cpp
+++ b/lib/gpu/re_squared.cpp
@@ -24,7 +24,7 @@
 using namespace LAMMPS_AL;
 
 #define RESquaredT RESquared<numtyp, acctyp>
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;
 
 template <class numtyp, class acctyp>
 RESquaredT::RESquared() : BaseEllipsoid<numtyp,acctyp>(),