More renaming...

2011-06-10 17:31:48 -04:00
parent 7e0f127008
commit 9ee36a3c22
53 changed files with 580 additions and 637 deletions
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@ -31,8 +31,8 @@ UCL_H  = $(wildcard ./geryon/ucl*.h)
 NVC_H  = $(wildcard ./geryon/nvc*.h) $(UCL_H)
 NVD_H  = $(wildcard ./geryon/nvd*.h) $(UCL_H) nv_kernel_def.h
 # Headers for Pair Stuff
-PAIR_H  = atom.h ans.h nbor_shared.h \
-          nbor.h precision.h device.h \
+PAIR_H  = atom.h answer.h neighbor_shared.h \
+          neighbor.h precision.h device.h \
          balance.h pppm.h

 ALL_H = $(NVD_H) $(PAIR_H)
@ -42,7 +42,7 @@ CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
        $(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
        $(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
 OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
-       $(OBJ_DIR)/nbor.o $(OBJ_DIR)/nbor_shared.o \
+       $(OBJ_DIR)/nbor.o $(OBJ_DIR)/neighbor_shared.o \
       $(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
       $(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
       $(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
@ -62,8 +62,8 @@ OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
       $(CUDPP)
 PTXS = $(OBJ_DIR)/device.ptx \
       $(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom_ptx.h \
-       $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
-       $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
+       $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
+       $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
       $(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \
       $(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \
       $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \
@ -111,26 +111,26 @@ $(OBJ_DIR)/atom_ptx.h: $(OBJ_DIR)/atom.ptx
 $(OBJ_DIR)/atom.o: atom.cpp atom.h $(NVD_H) $(OBJ_DIR)/atom_ptx.h
 	$(CUDR) -o $@ -c atom.cpp -I$(OBJ_DIR)

-$(OBJ_DIR)/ans.o: ans.cpp ans.h $(NVD_H)
-	$(CUDR) -o $@ -c ans.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/ans.o: answer.cpp answer.h $(NVD_H)
+	$(CUDR) -o $@ -c answer.cpp -I$(OBJ_DIR)

-$(OBJ_DIR)/nbor_cpu.ptx: nbor_cpu.cu
-	$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_cpu.cu
+$(OBJ_DIR)/neighbor_cpu.ptx: neighbor_cpu.cu
+	$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_cpu.cu

-$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/nbor_cpu.ptx
-	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h
+$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/neighbor_cpu.ptx
+	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h

-$(OBJ_DIR)/nbor_gpu.ptx: nbor_gpu.cu
-	$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_gpu.cu
+$(OBJ_DIR)/neighbor_gpu.ptx: neighbor_gpu.cu
+	$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_gpu.cu

-$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/nbor_gpu.ptx
-	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
+$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/neighbor_gpu.ptx
+	$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h

-$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
-	$(CUDR) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
+	$(CUDR) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)

-$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h nbor_shared.h $(NVD_H)
-	$(CUDR) -o $@ -c nbor.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h neighbor_shared.h $(NVD_H)
+	$(CUDR) -o $@ -c neighbor.cpp -I$(OBJ_DIR)

 $(OBJ_DIR)/device.ptx: device.cu
 	$(CUDA) --ptx -DNV_KERNEL -o $@ device.cu
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@ -24,15 +24,15 @@ OCL_LIB = $(LIB_DIR)/libgpu.a
 UCL_H  = $(wildcard ./geryon/ucl*.h)
 OCL_H  = $(wildcard ./geryon/ocl*.h) $(UCL_H)
 # Headers for Pair Stuff
-PAIR_H  = atom.h ans.h nbor_shared.h \
-          nbor.h precision.h device.h \
+PAIR_H  = atom.h answer.h neighbor_shared.h \
+          neighbor.h precision.h device.h \
          balance.h pppm.h

 ALL_H = $(OCL_H) $(PAIR_H)

 EXECS = $(BIN_DIR)/ocl_get_devices
 OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
-       $(OBJ_DIR)/nbor_shared.o $(OBJ_DIR)/nbor.o \
+       $(OBJ_DIR)/neighbor_shared.o $(OBJ_DIR)/nbor.o \
       $(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
       $(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
       $(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
@ -69,17 +69,17 @@ $(OBJ_DIR)/atom_cl.h: atom.cu
 $(OBJ_DIR)/atom.o: atom.cpp atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h
 	$(OCL) -o $@ -c atom.cpp -I$(OBJ_DIR)

-$(OBJ_DIR)/ans.o: ans.cpp ans.h $(OCL_H)
-	$(OCL) -o $@ -c ans.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/ans.o: answer.cpp answer.h $(OCL_H)
+	$(OCL) -o $@ -c answer.cpp -I$(OBJ_DIR)

-$(OBJ_DIR)/nbor_cl.h: nbor_cpu.cu
-	$(BSH) ./geryon/file_to_cstr.sh nbor_cpu.cu $(OBJ_DIR)/nbor_cl.h
+$(OBJ_DIR)/nbor_cl.h: neighbor_cpu.cu
+	$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu.cu $(OBJ_DIR)/nbor_cl.h

-$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
-	$(OCL) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
+	$(OCL) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)

-$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h $(OCL_H) nbor_shared.h
-	$(OCL) -o $@ -c nbor.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h $(OCL_H) neighbor_shared.h
+	$(OCL) -o $@ -c neighbor.cpp -I$(OBJ_DIR)

 $(OBJ_DIR)/pair_gpu_dev_cl.h: device.cu
 	$(BSH) ./geryon/file_to_cstr.sh device.cu $(OBJ_DIR)/pair_gpu_dev_cl.h
--- a/lib/gpu/answer.cpp
+++ b/lib/gpu/answer.cpp
@ -1,31 +1,29 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                 answer.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for data management of forces, torques, energies, and virials

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-#include "ans.h"
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/

-#define PairGPUAnsT PairGPUAns<numtyp,acctyp>
+#include "answer.h"
+
+#define AnswerT Answer<numtyp,acctyp>

 template <class numtyp, class acctyp>
-PairGPUAnsT::PairGPUAns() : _allocated(false),_eflag(false),_vflag(false),
+AnswerT::Answer() : _allocated(false),_eflag(false),_vflag(false),
                            _inum(0),_ilist(NULL),_newton(false) {
 }

 template <class numtyp, class acctyp>
-int PairGPUAnsT::bytes_per_atom() const { 
+int AnswerT::bytes_per_atom() const { 
  int bytes=11*sizeof(acctyp);
  if (_rot)
    bytes+=4*sizeof(acctyp);
@ -35,7 +33,7 @@ int PairGPUAnsT::bytes_per_atom() const {
 }

 template <class numtyp, class acctyp>
-bool PairGPUAnsT::alloc(const int inum) {
+bool AnswerT::alloc(const int inum) {
  _max_local=static_cast<int>(static_cast<double>(inum)*1.10);

  bool success=true;
@ -70,7 +68,7 @@ bool PairGPUAnsT::alloc(const int inum) {
 }

 template <class numtyp, class acctyp>
-bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
+bool AnswerT::init(const int inum, const bool charge, const bool rot,
                       UCL_Device &devi) {
  clear();

@ -100,7 +98,7 @@ bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
 }
  
 template <class numtyp, class acctyp>
-bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
+bool AnswerT::add_fields(const bool charge, const bool rot) {
  bool realloc=false;
  if (charge && _charge==false) {
    _charge=true;
@ -122,7 +120,7 @@ bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
 }

 template <class numtyp, class acctyp>
-void PairGPUAnsT::clear_resize() {
+void AnswerT::clear_resize() {
  if (!_allocated)
    return;
  _allocated=false;
@ -134,7 +132,7 @@ void PairGPUAnsT::clear_resize() {
 }

 template <class numtyp, class acctyp>
-void PairGPUAnsT::clear() {
+void AnswerT::clear() {
  _gpu_bytes=0;
  if (!_allocated)
    return;
@ -148,7 +146,7 @@ void PairGPUAnsT::clear() {
 }

 template <class numtyp, class acctyp>
-double PairGPUAnsT::host_memory_usage() const {
+double AnswerT::host_memory_usage() const {
  int atom_bytes=4;
  if (_charge) 
    atom_bytes+=1;
@ -156,11 +154,11 @@ double PairGPUAnsT::host_memory_usage() const {
    atom_bytes+=4;
  int ans_bytes=atom_bytes+_ev_fields;
  return ans_bytes*(_max_local)*sizeof(acctyp)+
-         sizeof(PairGPUAns<numtyp,acctyp>);
+         sizeof(Answer<numtyp,acctyp>);
 }
  
 template <class numtyp, class acctyp>
-void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
+void AnswerT::copy_answers(const bool eflag, const bool vflag,
                               const bool ef_atom, const bool vf_atom) {
  time_answer.start();
  _eflag=eflag;
@ -184,7 +182,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
 }

 template <class numtyp, class acctyp>
-void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
+void AnswerT::copy_answers(const bool eflag, const bool vflag,
                               const bool ef_atom, const bool vf_atom,
                               int *ilist) {
  _ilist=ilist;
@ -192,7 +190,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
 }

 template <class numtyp, class acctyp>
-double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
+double AnswerT::energy_virial(double *eatom, double **vatom,
                                  double *virial) {
  if (_eflag==false && _vflag==false)
    return 0.0;
@ -268,7 +266,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
 }

 template <class numtyp, class acctyp>
-double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
+double AnswerT::energy_virial(double *eatom, double **vatom,
                                   double *virial, double &ecoul) {
  if (_eflag==false && _vflag==false)
    return 0.0;
@ -359,7 +357,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
 }

 template <class numtyp, class acctyp>
-void PairGPUAnsT::get_answers(double **f, double **tor) {
+void AnswerT::get_answers(double **f, double **tor) {
  acctyp *ap=host_ans.begin();
  if (_ilist==NULL) {
    for (int i=0; i<_inum; i++) {
@ -404,4 +402,4 @@ void PairGPUAnsT::get_answers(double **f, double **tor) {
  }
 }

-template class PairGPUAns<PRECISION,ACC_PRECISION>;
+template class Answer<PRECISION,ACC_PRECISION>;
--- a/lib/gpu/answer.h
+++ b/lib/gpu/answer.h
@ -1,22 +1,20 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  answer.h
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for data management of forces, torques, energies, and virials

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/

-#ifndef PAIR_GPU_ANS_H
-#define PAIR_GPU_ANS_H
+#ifndef LAL_ANSWER_H
+#define LAL_ANSWER_H

 #include <math.h>
 #include "mpi.h"
@ -38,10 +36,10 @@ using namespace ucl_cudadr;
 #include "precision.h"

 template <class numtyp, class acctyp>
-class PairGPUAns {
+class Answer {
 public:
-  PairGPUAns();
-  ~PairGPUAns() { clear(); }
+  Answer();
+  ~Answer() { clear(); }

  /// Current number of local atoms stored
  inline int inum() const { return _inum; }
--- a/lib/gpu/atom.cpp
+++ b/lib/gpu/atom.cpp
@ -1,26 +1,24 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  atom.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for particle data management

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/

 #include "atom.h"

-#define PairGPUAtomT PairGPUAtom<numtyp,acctyp>
+#define AtomT Atom<numtyp,acctyp>

 template <class numtyp, class acctyp>
-PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
+AtomT::Atom() : _compiled(false),_allocated(false),
                              _max_gpu_bytes(0) {
  #ifndef USE_OPENCL
  sort_config.op = CUDPP_ADD;
@ -31,7 +29,7 @@ PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
 }

 template <class numtyp, class acctyp>
-int PairGPUAtomT::bytes_per_atom() const { 
+int AtomT::bytes_per_atom() const { 
  int id_space=0;
  if (_gpu_nbor)
    id_space=2;
@ -44,7 +42,7 @@ int PairGPUAtomT::bytes_per_atom() const {
 }

 template <class numtyp, class acctyp>
-bool PairGPUAtomT::alloc(const int nall) {
+bool AtomT::alloc(const int nall) {
  _max_atoms=static_cast<int>(static_cast<double>(nall)*1.10);

  bool success=true;
@ -138,7 +136,7 @@ bool PairGPUAtomT::alloc(const int nall) {
 }

 template <class numtyp, class acctyp>
-bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
+bool AtomT::add_fields(const bool charge, const bool rot,
                              const bool gpu_nbor, const bool bonds) {
  bool realloc=false;
  if (charge && _charge==false) {
@ -167,7 +165,7 @@ bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
 }

 template <class numtyp, class acctyp>
-bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
+bool AtomT::init(const int nall, const bool charge, const bool rot,
                        UCL_Device &devi, const bool gpu_nbor,
                        const bool bonds) {
  clear();
@ -206,7 +204,7 @@ bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
 }
  
 template <class numtyp, class acctyp>
-void PairGPUAtomT::clear_resize() {
+void AtomT::clear_resize() {
  if (!_allocated)
    return;
  _allocated=false;
@ -240,7 +238,7 @@ void PairGPUAtomT::clear_resize() {
 }

 template <class numtyp, class acctyp>
-void PairGPUAtomT::clear() {
+void AtomT::clear() {
  _max_gpu_bytes=0;
  if (!_allocated)
    return;
@ -260,19 +258,19 @@ void PairGPUAtomT::clear() {
 }

 template <class numtyp, class acctyp>
-double PairGPUAtomT::host_memory_usage() const {
+double AtomT::host_memory_usage() const {
  int atom_bytes=4;
  if (_charge) 
    atom_bytes+=1;
  if (_rot) 
    atom_bytes+=4;
  return _max_atoms*atom_bytes*sizeof(numtyp)+
-         sizeof(PairGPUAtom<numtyp,acctyp>);
+         sizeof(Atom<numtyp,acctyp>);
 }
  
 // Sort arrays for neighbor list calculation
 template <class numtyp, class acctyp>
-void PairGPUAtomT::sort_neighbor(const int num_atoms) {
+void AtomT::sort_neighbor(const int num_atoms) {
  #ifndef USE_OPENCL
  CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(), 
                                 (int *)dev_particle_id.begin(), 
@ -292,7 +290,7 @@ void PairGPUAtomT::sort_neighbor(const int num_atoms) {
 #endif

 template <class numtyp, class acctyp>
-void PairGPUAtomT::compile_kernels(UCL_Device &dev) {
+void AtomT::compile_kernels(UCL_Device &dev) {
  atom_program=new UCL_Program(dev);
  atom_program->load_string(atom,"");
  k_cast_x.set_function(*atom_program,"kernel_cast_x");
@ -301,4 +299,4 @@ void PairGPUAtomT::compile_kernels(UCL_Device &dev) {

 #endif

-template class PairGPUAtom<PRECISION,ACC_PRECISION>;
+template class Atom<PRECISION,ACC_PRECISION>;
--- a/lib/gpu/atom.cu
+++ b/lib/gpu/atom.cu
@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+// **************************************************************************
+//                                  atom.cu
+//                             -------------------
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for atom data casting
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : brownw@ornl.gov
+// ***************************************************************************/

 #ifdef NV_KERNEL
 #include "geryon/ucl_nv_kernel.h"
--- a/lib/gpu/atom.h
+++ b/lib/gpu/atom.h
@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                   atom.h
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for particle data management

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/

 #ifndef PAIR_GPU_ATOM_H
 #define PAIR_GPU_ATOM_H
@ -41,10 +39,10 @@ using namespace ucl_cudadr;
 #include "precision.h"

 template <class numtyp, class acctyp>
-class PairGPUAtom {
+class Atom {
 public:
-  PairGPUAtom();
-  ~PairGPUAtom() { clear(); }
+  Atom();
+  ~Atom() { clear(); }

  /// Maximum number of atoms that can be stored with current allocation
  inline int max_atoms() const { return _max_atoms; }
--- a/lib/gpu/balance.h
+++ b/lib/gpu/balance.h
@ -1,22 +1,20 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  balance.h
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for host-device load balancing

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-#ifndef PAIR_GPU_BALANCE_H
-#define PAIR_GPU_BALANCE_H
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_BALANCE_H
+#define LAL_BALANCE_H

 #include "device.h"
 #include <math.h>
@ -27,13 +25,13 @@

 /// Host/device load balancer
 template<class numtyp, class acctyp>
-class PairGPUBalance {
+class Balance {
 public:
-  inline PairGPUBalance() : _init_done(false), _measure_this_step(false) {}
-  inline ~PairGPUBalance() { clear(); }
+  inline Balance() : _init_done(false), _measure_this_step(false) {}
+  inline ~Balance() { clear(); }

  /// Clear any old data and setup for new LAMMPS run
-  inline void init(PairGPUDevice<numtyp, acctyp> *gpu, const bool gpu_nbor,
+  inline void init(Device<numtyp, acctyp> *gpu, const bool gpu_nbor,
                   const double split);

  /// Clear all host and device data
@ -107,7 +105,7 @@ class PairGPUBalance {
  }
  
 private:
-  PairGPUDevice<numtyp,acctyp> *_device;
+  Device<numtyp,acctyp> *_device;
  UCL_Timer _device_time;
  bool _init_done, _gpu_nbor;
  
@ -119,10 +117,10 @@ class PairGPUBalance {
  int _inum, _inum_full, _timestep;
 };

-#define PairGPUBalanceT PairGPUBalance<numtyp,acctyp>
+#define BalanceT Balance<numtyp,acctyp>

 template <class numtyp, class acctyp>
-void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu, 
+void BalanceT::init(Device<numtyp, acctyp> *gpu, 
                           const bool gpu_nbor, const double split) {
  clear();
  _gpu_nbor=gpu_nbor;
@ -145,7 +143,7 @@ void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu,
 }

 template <class numtyp, class acctyp>
-int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
+int BalanceT::get_gpu_count(const int ago, const int inum_full) {
  _measure_this_step=false;
  if (_load_balance) {
    if (_avg_count<11 || _timestep%_HD_BALANCE_EVERY==0) {
@ -164,7 +162,7 @@ int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
 }
    
 template <class numtyp, class acctyp>
-void PairGPUBalanceT::balance(const double cpu_time) {
+void BalanceT::balance(const double cpu_time) {
  if (_measure_this_step) {
    _measure_this_step=false;
    double gpu_time=_device_time.seconds();
--- a/lib/gpu/base_atomic.cpp
+++ b/lib/gpu/base_atomic.cpp
@ -1,46 +1,44 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                base_atomic.h
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles with per-particle data for position and type

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
 
 #include "base_atomic.h"
-#define AtomicGPUMemoryT AtomicGPUMemory<numtyp, acctyp>
+#define BaseAtomicT BaseAtomic<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;

 template <class numtyp, class acctyp>
-AtomicGPUMemoryT::AtomicGPUMemory() : _compiled(false), _max_bytes(0)  {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
-  nbor=new PairGPUNbor();
+BaseAtomicT::BaseAtomic() : _compiled(false), _max_bytes(0)  {
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
+  nbor=new Neighbor();
 }

 template <class numtyp, class acctyp>
-AtomicGPUMemoryT::~AtomicGPUMemory() {
+BaseAtomicT::~BaseAtomic() {
  delete ans;
  delete nbor;
 }

 template <class numtyp, class acctyp>
-int AtomicGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
+int BaseAtomicT::bytes_per_atom_atomic(const int max_nbors) const {
  return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
         nbor->bytes_per_atom(max_nbors);
 }

 template <class numtyp, class acctyp>
-int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
+int BaseAtomicT::init_atomic(const int nlocal, const int nall,
                                  const int max_nbors, const int maxspecial,
                                  const double cell_size,
                                  const double gpu_split, FILE *_screen,
@ -49,7 +47,7 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
  screen=_screen;

  bool gpu_nbor=false;
-  if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
+  if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
    gpu_nbor=true;

  int _gpu_host=0;
@ -90,12 +88,12 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
 }

 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::estimate_gpu_overhead() {
+void BaseAtomicT::estimate_gpu_overhead() {
  device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
 }

 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::clear_atomic() {
+void BaseAtomicT::clear_atomic() {
  // Output any timing information
  acc_timers();
  double avg_split=hd_balancer.all_avg_split();
@ -123,7 +121,7 @@ void AtomicGPUMemoryT::clear_atomic() {
 // Copy neighbor list from host
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
+int * BaseAtomicT::reset_nbors(const int nall, const int inum, int *ilist,
                                   int *numj, int **firstneigh, bool &success) {
  success=true;

@ -147,7 +145,7 @@ int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
 // Build neighbor list on device
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
+inline void BaseAtomicT::build_nbor_list(const int inum,
                                              const int host_inum,
                                              const int nall, double **host_x,
                                              int *host_type, double *sublo,
@ -176,7 +174,7 @@ inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
 // Copy nbor list from host if necessary and then calculate forces, virials,..
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
+void BaseAtomicT::compute(const int f_ago, const int inum_full,
                               const int nall, double **host_x, int *host_type,
                               int *ilist, int *numj, int **firstneigh,
                               const bool eflag, const bool vflag,
@ -217,7 +215,7 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
 // Reneighbor on GPU if necessary and then compute forces, virials, energies
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
+int ** BaseAtomicT::compute(const int ago, const int inum_full,
                                 const int nall, double **host_x, int *host_type,
                                 double *sublo, double *subhi, int *tag,
                                 int **nspecial, int **special, const bool eflag, 
@ -263,13 +261,13 @@ int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
 }

 template <class numtyp, class acctyp>
-double AtomicGPUMemoryT::host_memory_usage_atomic() const {
+double BaseAtomicT::host_memory_usage_atomic() const {
  return device->atom.host_memory_usage()+nbor->host_memory_usage()+
-         4*sizeof(numtyp)+sizeof(AtomicGPUMemory<numtyp,acctyp>);
+         4*sizeof(numtyp)+sizeof(BaseAtomic<numtyp,acctyp>);
 }

 template <class numtyp, class acctyp>
-void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
+void BaseAtomicT::compile_kernels(UCL_Device &dev, const char *pair_str) {
  if (_compiled)
    return;

@ -285,5 +283,5 @@ void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
  _compiled=true;
 }

-template class AtomicGPUMemory<PRECISION,ACC_PRECISION>;
+template class BaseAtomic<PRECISION,ACC_PRECISION>;

--- a/lib/gpu/base_atomic.h
+++ b/lib/gpu/base_atomic.h
@ -1,22 +1,20 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                base_atomic.h
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles with per-particle data for position and type

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-#ifndef ATOMIC_GPU_MEMORY_H
-#define ATOMIC_GPU_MEMORY_H
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_BASE_ATOMIC_H
+#define LAL_BASE_ATOMIC_H

 #include "device.h"
 #include "balance.h"
@ -29,10 +27,10 @@
 #endif

 template <class numtyp, class acctyp>
-class AtomicGPUMemory {
+class BaseAtomic {
 public:
-  AtomicGPUMemory();
-  virtual ~AtomicGPUMemory();
+  BaseAtomic();
+  virtual ~BaseAtomic();

  /// Clear any previous data and set up for a new LAMMPS run
  /** \param max_nbors initial number of rows in the neighbor matrix
@ -150,7 +148,7 @@ class AtomicGPUMemory {
  // -------------------------- DEVICE DATA ------------------------- 

  /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;

  /// Geryon device
  UCL_Device *ucl_device;
@ -159,7 +157,7 @@ class AtomicGPUMemory {
  UCL_Timer time_pair;

  /// Host device load balancer
-  PairGPUBalance<numtyp,acctyp> hd_balancer;
+  Balance<numtyp,acctyp> hd_balancer;

  /// LAMMPS pointer for screen output
  FILE *screen;
@ -167,16 +165,16 @@ class AtomicGPUMemory {
  // --------------------------- ATOM DATA --------------------------

  /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;

  // ------------------------ FORCE/ENERGY DATA -----------------------

-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;

  // --------------------------- NBOR DATA ----------------------------

  /// Neighbor data
-  PairGPUNbor *nbor;
+  Neighbor *nbor;

  /// True if we need to accumulate time for neighboring
  bool nbor_time_avail;
--- a/lib/gpu/base_charge.cpp
+++ b/lib/gpu/base_charge.cpp
@ -1,46 +1,45 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                               base_charge.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles needing per-particle data for position,
+  charge, and type.

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/

 #include "base_charge.h"
-#define ChargeGPUMemoryT ChargeGPUMemory<numtyp, acctyp>
+#define BaseChargeT BaseCharge<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;

 template <class numtyp, class acctyp>
-ChargeGPUMemoryT::ChargeGPUMemory() : _compiled(false), _max_bytes(0) {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
-  nbor=new PairGPUNbor();
+BaseChargeT::BaseCharge() : _compiled(false), _max_bytes(0) {
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
+  nbor=new Neighbor();
 }

 template <class numtyp, class acctyp>
-ChargeGPUMemoryT::~ChargeGPUMemory() {
+BaseChargeT::~BaseCharge() {
  delete ans;
  delete nbor;
 }

 template <class numtyp, class acctyp>
-int ChargeGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
+int BaseChargeT::bytes_per_atom_atomic(const int max_nbors) const {
  return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
         nbor->bytes_per_atom(max_nbors);
 }

 template <class numtyp, class acctyp>
-int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
+int BaseChargeT::init_atomic(const int nlocal, const int nall,
                                  const int max_nbors, const int maxspecial,
                                  const double cell_size,
                                  const double gpu_split, FILE *_screen,
@ -49,7 +48,7 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
  screen=_screen;

  bool gpu_nbor=false;
-  if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
+  if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
    gpu_nbor=true;

  int _gpu_host=0;
@ -92,12 +91,12 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
 }

 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::estimate_gpu_overhead() {
+void BaseChargeT::estimate_gpu_overhead() {
  device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
 }

 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::clear_atomic() {
+void BaseChargeT::clear_atomic() {
  // Output any timing information
  acc_timers();
  double avg_split=hd_balancer.all_avg_split();
@ -125,7 +124,7 @@ void ChargeGPUMemoryT::clear_atomic() {
 // Copy neighbor list from host
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
+int * BaseChargeT::reset_nbors(const int nall, const int inum, int *ilist,
                                   int *numj, int **firstneigh, bool &success) {
  success=true;

@ -150,7 +149,7 @@ int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
 // Build neighbor list on device
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
+inline void BaseChargeT::build_nbor_list(const int inum,
                                              const int host_inum,
                                              const int nall, double **host_x,
                                              int *host_type, double *sublo,
@ -179,7 +178,7 @@ inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
 // Copy nbor list from host if necessary and then calculate forces, virials,..
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
+void BaseChargeT::compute(const int f_ago, const int inum_full,
                               const int nall, double **host_x, int *host_type,
                               int *ilist, int *numj, int **firstneigh,
                               const bool eflag, const bool vflag,
@ -226,7 +225,7 @@ void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
 // Reneighbor on GPU if necessary and then compute forces, virials, energies
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
-int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
+int** BaseChargeT::compute(const int ago, const int inum_full,
                                const int nall, double **host_x, int *host_type,
                                double *sublo, double *subhi, int *tag,
                                int **nspecial, int **special, const bool eflag, 
@ -279,13 +278,13 @@ int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
 }

 template <class numtyp, class acctyp>
-double ChargeGPUMemoryT::host_memory_usage_atomic() const {
+double BaseChargeT::host_memory_usage_atomic() const {
  return device->atom.host_memory_usage()+nbor->host_memory_usage()+
-         4*sizeof(numtyp)+sizeof(ChargeGPUMemory<numtyp,acctyp>);
+         4*sizeof(numtyp)+sizeof(BaseCharge<numtyp,acctyp>);
 }

 template <class numtyp, class acctyp>
-void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
+void BaseChargeT::compile_kernels(UCL_Device &dev, const char *pair_str) {
  if (_compiled)
    return;

@ -302,5 +301,5 @@ void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
  _compiled=true;
 }

-template class ChargeGPUMemory<PRECISION,ACC_PRECISION>;
+template class BaseCharge<PRECISION,ACC_PRECISION>;

--- a/lib/gpu/base_charge.h
+++ b/lib/gpu/base_charge.h
@ -1,22 +1,21 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                base_charge.h
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Base class for pair styles needing per-particle data for position,
+  charge, and type.

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-#ifndef CHARGE_GPU_MEMORY_H
-#define CHARGE_GPU_MEMORY_H
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_BASE_CHARGE_H
+#define LAL_BASE_CHARGE_H

 #include "device.h"
 #include "balance.h"
@ -29,10 +28,10 @@
 #endif

 template <class numtyp, class acctyp>
-class ChargeGPUMemory {
+class BaseCharge {
 public:
-  ChargeGPUMemory();
-  virtual ~ChargeGPUMemory();
+  BaseCharge();
+  virtual ~BaseCharge();

  /// Clear any previous data and set up for a new LAMMPS run
  /** \param max_nbors initial number of rows in the neighbor matrix
@ -146,7 +145,7 @@ class ChargeGPUMemory {
  // -------------------------- DEVICE DATA ------------------------- 

  /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;

  /// Geryon device
  UCL_Device *ucl_device;
@ -155,7 +154,7 @@ class ChargeGPUMemory {
  UCL_Timer time_pair;

  /// Host device load balancer
-  PairGPUBalance<numtyp,acctyp> hd_balancer;
+  Balance<numtyp,acctyp> hd_balancer;

  /// LAMMPS pointer for screen output
  FILE *screen;
@ -163,17 +162,17 @@ class ChargeGPUMemory {
  // --------------------------- ATOM DATA --------------------------

  /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;


  // ------------------------ FORCE/ENERGY DATA -----------------------

-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;

  // --------------------------- NBOR DATA ----------------------------

  /// Neighbor data
-  PairGPUNbor *nbor;
+  Neighbor *nbor;

  /// True if we need to accumulate time for neighboring
  bool nbor_time_avail;
--- a/lib/gpu/base_ellipsoid.cpp
+++ b/lib/gpu/base_ellipsoid.cpp
@ -1,7 +1,7 @@
 /***************************************************************************
                              base_ellipsoid.cpp
                             -------------------
-                               W. Michael Brown
+                            W. Michael Brown (ORNL)

  Base class for acceleration of ellipsoid potentials

@ -23,13 +23,13 @@ using namespace LAMMPS_AL;
 #endif

 #define BaseEllipsoidT BaseEllipsoid<numtyp, acctyp>
-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;

 template <class numtyp, class acctyp>
 BaseEllipsoidT::BaseEllipsoid() : _compiled(false), _max_bytes(0) {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
-  nbor=new PairGPUNbor();
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
+  nbor=new Neighbor();
 }

 template <class numtyp, class acctyp>
@ -56,7 +56,7 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
  _ellipsoid_sphere=ellip_sphere;

  bool gpu_nbor=false;
-  if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
+  if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
    gpu_nbor=true;

  int _gpu_host=0;
--- a/lib/gpu/base_ellipsoid.h
+++ b/lib/gpu/base_ellipsoid.h
@ -1,7 +1,7 @@
 /***************************************************************************
                               base_ellipsoid.h
                             -------------------
-                               W. Michael Brown
+                            W. Michael Brown (ORNL)

  Base class for acceleration of ellipsoid potentials

@ -13,8 +13,8 @@
    email                : brownw@ornl.gov
 ***************************************************************************/

-#ifndef BASE_ELLIPSOID_H
-#define BASE_ELLIPSOID_H
+#ifndef LAL_BASE_ELLIPSOID_H
+#define LAL_BASE_ELLIPSOID_H

 #include "device.h"
 #include "balance.h"
@ -182,7 +182,7 @@ class BaseEllipsoid {
  // -------------------------- DEVICE DATA ------------------------- 

  /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;

  /// Geryon device
  UCL_Device *ucl_device;
@ -192,7 +192,7 @@ class BaseEllipsoid {
  UCL_Timer time_nbor3, time_ellipsoid3;

  /// Host device load balancer
-  PairGPUBalance<numtyp,acctyp> hd_balancer;
+  Balance<numtyp,acctyp> hd_balancer;

  /// LAMMPS pointer for screen output
  FILE *screen;
@ -200,7 +200,7 @@ class BaseEllipsoid {
  // --------------------------- ATOM DATA --------------------------

  /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;

  // --------------------------- TYPE DATA -------------------------- 

@ -209,12 +209,12 @@ class BaseEllipsoid {

  // ------------------------ FORCE/ENERGY DATA -----------------------

-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;

  // --------------------------- NBOR DATA ----------------------------

  /// Neighbor data
-  PairGPUNbor *nbor;
+  Neighbor *nbor;
  /// ilist with particles sorted by type
  UCL_H_Vec<int> host_olist;
  /// True if we need to accumulate time for neighboring
--- a/lib/gpu/cg_cmm.cpp
+++ b/lib/gpu/cg_cmm.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define CMM_GPU_MemoryT CMM_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-CMM_GPU_MemoryT::CMM_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+CMM_GPU_MemoryT::CMM_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }

 template <class numtyp, class acctyp>
--- a/lib/gpu/cg_cmm.h
+++ b/lib/gpu/cg_cmm.h
@ -21,7 +21,7 @@
 #include "base_atomic.h"

 template <class numtyp, class acctyp>
-class CMM_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class CMM_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
 public:
  CMM_GPU_Memory();
  ~CMM_GPU_Memory(); 
--- a/lib/gpu/cg_cmm_long.cpp
+++ b/lib/gpu/cg_cmm_long.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define CMML_GPU_MemoryT CMML_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-CMML_GPU_MemoryT::CMML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+CMML_GPU_MemoryT::CMML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                    _allocated(false) {
 }

--- a/lib/gpu/cg_cmm_long.h
+++ b/lib/gpu/cg_cmm_long.h
@ -21,7 +21,7 @@
 #include "base_charge.h"

 template <class numtyp, class acctyp>
-class CMML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class CMML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
 public:
  CMML_GPU_Memory();
  ~CMML_GPU_Memory();
--- a/lib/gpu/cg_cmm_msm.cpp
+++ b/lib/gpu/cg_cmm_msm.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define CMMM_GPU_MemoryT CMMM_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-CMMM_GPU_MemoryT::CMMM_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+CMMM_GPU_MemoryT::CMMM_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                    _allocated(false) {
 }

--- a/lib/gpu/cg_cmm_msm.h
+++ b/lib/gpu/cg_cmm_msm.h
@ -21,7 +21,7 @@
 #include "base_charge.h"

 template <class numtyp, class acctyp>
-class CMMM_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class CMMM_GPU_Memory : public BaseCharge<numtyp, acctyp> {
 public:
  CMMM_GPU_Memory();
  ~CMMM_GPU_Memory();
--- a/lib/gpu/charmm_long.cpp
+++ b/lib/gpu/charmm_long.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define CRML_GPU_MemoryT CRML_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-CRML_GPU_MemoryT::CRML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+CRML_GPU_MemoryT::CRML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                    _allocated(false) {
 }

--- a/lib/gpu/charmm_long.h
+++ b/lib/gpu/charmm_long.h
@ -21,7 +21,7 @@
 #include "base_charge.h"

 template <class numtyp, class acctyp>
-class CRML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class CRML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
 public:
  CRML_GPU_Memory();
  ~CRML_GPU_Memory();
--- a/lib/gpu/device.cpp
+++ b/lib/gpu/device.cpp
@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  device.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for management of the device where the computations are performed

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/

 #include "device.h"
 #include "precision.h"
@ -29,21 +27,21 @@
 #include "pair_gpu_dev_ptx.h"
 #endif

-#define PairGPUDeviceT PairGPUDevice<numtyp, acctyp>
+#define DeviceT Device<numtyp, acctyp>

 template <class numtyp, class acctyp>
-PairGPUDeviceT::PairGPUDevice() : _init_count(0), _device_init(false),
+DeviceT::Device() : _init_count(0), _device_init(false),
                                  _gpu_mode(GPU_FORCE), _first_device(0),
                                  _last_device(0), _compiled(false) {
 }

 template <class numtyp, class acctyp>
-PairGPUDeviceT::~PairGPUDevice() {
+DeviceT::~Device() {
  clear_device();
 }

 template <class numtyp, class acctyp>
-int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica, 
+int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, 
                                const int first_gpu, const int last_gpu,
                                const int gpu_mode, const double p_split,
                                const int nthreads, const int t_per_atom) {
@ -135,10 +133,10 @@ int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica,
 }

 template <class numtyp, class acctyp>
-int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
+int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
                         const bool rot, const int nlocal, 
                         const int host_nlocal, const int nall,
-                         PairGPUNbor *nbor, const int maxspecial,
+                         Neighbor *nbor, const int maxspecial,
                         const int gpu_host, const int max_nbors, 
                         const double cell_size, const bool pre_cut) {
  if (!_device_init)
@ -181,7 +179,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
  if (!ans.init(ef_nlocal,charge,rot,*gpu))
    return -3;

-  if (!nbor->init(&_nbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
+  if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
                  *gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d, 
                  _block_cell_id, _block_nbor_build))
    return -3;
@ -192,7 +190,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
 }

 template <class numtyp, class acctyp>
-int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
+int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
                         const int nall) {
  if (!_device_init)
    return -1;                          
@ -215,21 +213,21 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
 }

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::set_single_precompute
+void DeviceT::set_single_precompute
                     (PPPMGPUMemory<numtyp,acctyp,float,_lgpu_float4> *pppm) {
  _long_range_precompute=1;
  pppm_single=pppm;
 }

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::set_double_precompute
+void DeviceT::set_double_precompute
                     (PPPMGPUMemory<numtyp,acctyp,double,_lgpu_double4> *pppm) {
  _long_range_precompute=2;
  pppm_double=pppm;
 }

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::init_message(FILE *screen, const char *name,
+void DeviceT::init_message(FILE *screen, const char *name,
                                  const int first_gpu, const int last_gpu) {
  #ifdef USE_OPENCL
  std::string fs="";
@ -272,7 +270,7 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name,
 }

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls, 
+void DeviceT::estimate_gpu_overhead(const int kernel_calls, 
                                           double &gpu_overhead,
                                           double &gpu_driver_overhead) {
  UCL_H_Vec<int> *host_data_in=NULL, *host_data_out=NULL;
@ -384,9 +382,9 @@ void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls,
 }              

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::output_times(UCL_Timer &time_pair, 
-                                  PairGPUAns<numtyp,acctyp> &ans, 
-                                  PairGPUNbor &nbor, const double avg_split, 
+void DeviceT::output_times(UCL_Timer &time_pair, 
+                                  Answer<numtyp,acctyp> &ans, 
+                                  Neighbor &nbor, const double avg_split, 
                                  const double max_bytes, 
                                  const double gpu_overhead,
                                  const double driver_overhead, 
@ -440,12 +438,12 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
 }

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in, 
+void DeviceT::output_kspace_times(UCL_Timer &time_in, 
                                         UCL_Timer &time_out,
                                         UCL_Timer &time_map,
                                         UCL_Timer &time_rho,
                                         UCL_Timer &time_interp,
-                                         PairGPUAns<numtyp,acctyp> &ans, 
+                                         Answer<numtyp,acctyp> &ans, 
                                         const double max_bytes, 
                                         const double cpu_time, 
                                         const double idle_time, FILE *screen) {
@ -500,13 +498,13 @@ void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in,
 }

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::clear() {
+void DeviceT::clear() {
  if (_init_count>0) {
    _long_range_precompute=0;
    _init_count--;
    if (_init_count==0) {
      atom.clear();
-      _nbor_shared.clear();
+      _neighbor_shared.clear();
      if (_compiled) {
        k_zero.clear();
        k_info.clear();
@ -518,7 +516,7 @@ void PairGPUDeviceT::clear() {
 }

 template <class numtyp, class acctyp>
-void PairGPUDeviceT::clear_device() {
+void DeviceT::clear_device() {
  while (_init_count>0)
    clear();
  if (_device_init) {
@ -528,7 +526,7 @@ void PairGPUDeviceT::clear_device() {
 }

 template <class numtyp, class acctyp>
-int PairGPUDeviceT::compile_kernels() {
+int DeviceT::compile_kernels() {
  int flag=0;

  if (_compiled)
@ -588,27 +586,27 @@ int PairGPUDeviceT::compile_kernels() {
 }

 template <class numtyp, class acctyp>
-double PairGPUDeviceT::host_memory_usage() const {
+double DeviceT::host_memory_usage() const {
  return atom.host_memory_usage()+4*sizeof(numtyp)+
-         sizeof(PairGPUDevice<numtyp,acctyp>);
+         sizeof(Device<numtyp,acctyp>);
 }

-template class PairGPUDevice<PRECISION,ACC_PRECISION>;
-PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+template class Device<PRECISION,ACC_PRECISION>;
+Device<PRECISION,ACC_PRECISION> global_device;

 int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
                    const int last_gpu, const int gpu_mode, 
                    const double particle_split, const int nthreads,
                    const int t_per_atom) {
-  return pair_gpu_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
+  return global_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
                                     particle_split,nthreads,t_per_atom);
 }

 void lmp_clear_device() {
-  pair_gpu_device.clear_device();
+  global_device.clear_device();
 }

 double lmp_gpu_forces(double **f, double **tor, double *eatom,
                      double **vatom, double *virial, double &ecoul) {
-  return pair_gpu_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
+  return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
 }
--- a/lib/gpu/device.cu
+++ b/lib/gpu/device.cu
@ -1,19 +1,17 @@
-/* ----------------------------------------------------------------------
-   LAMMPS-Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+// **************************************************************************
+//                                  device.cu
+//                             -------------------
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for device information
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : brownw@ornl.gov
+// ***************************************************************************/

 /*************************************************************************
                           Preprocessor Definitions
@ -63,9 +61,6 @@

 *************************************************************************/

-#ifndef PAIR_GPU_DEV_KERNEL
-#define PAIR_GPU_DEV_KERNEL
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -116,5 +111,3 @@ __kernel void kernel_info(__global int *info) {
  info[13]=THREADS_PER_CHARGE;
 }

-#endif
-
--- a/lib/gpu/device.h
+++ b/lib/gpu/device.h
@ -1,26 +1,24 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  device.h
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for management of the device where the computations are performed

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/

-#ifndef PAIR_GPU_DEVICE_H
-#define PAIR_GPU_DEVICE_H
+#ifndef LAL_DEVICE_H
+#define LAL_DEVICE_H

 #include "atom.h"
-#include "ans.h"
-#include "nbor.h"
+#include "answer.h"
+#include "neighbor.h"
 #include "pppm.h"
 #include "mpi.h"
 #include <sstream>
@ -32,10 +30,10 @@ template <class numtyp, class acctyp,
          class grdtyp, class grdtyp4> class PPPMGPUMemory;

 template <class numtyp, class acctyp>
-class PairGPUDevice {
+class Device {
 public:
-  PairGPUDevice();
-  ~PairGPUDevice(); 
+  Device();
+  ~Device(); 
 
  /// Initialize the device for use by this process
  /** Sets up a per-device MPI communicator for load balancing and initializes
@ -68,9 +66,9 @@ class PairGPUDevice {
    * - -3 if there is an out of memory error
    * - -4 if the GPU library was not compiled for GPU
    * - -5 Double precision is not supported on card **/
-  int init(PairGPUAns<numtyp,acctyp> &a, const bool charge, const bool rot,
+  int init(Answer<numtyp,acctyp> &a, const bool charge, const bool rot,
           const int nlocal, const int host_nlocal, const int nall,
-           PairGPUNbor *nbor, const int maxspecial, const int gpu_host,
+           Neighbor *nbor, const int maxspecial, const int gpu_host,
           const int max_nbors, const double cell_size, const bool pre_cut);

  /// Initialize the device for Atom storage only
@ -83,7 +81,7 @@ class PairGPUDevice {
    * - -3 if there is an out of memory error
    * - -4 if the GPU library was not compiled for GPU
    * - -5 Double precision is not supported on card **/
-  int init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal, const int nall);
+  int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);

  /// Output a message for pair_style acceleration with device stats
  void init_message(FILE *screen, const char *name,
@ -109,8 +107,8 @@ class PairGPUDevice {
  inline bool double_precision() { return gpu->double_precision(); }
  
  /// Output a message with timing information
-  void output_times(UCL_Timer &time_pair, PairGPUAns<numtyp,acctyp> &ans, 
-                    PairGPUNbor &nbor, const double avg_split, 
+  void output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans, 
+                    Neighbor &nbor, const double avg_split, 
                    const double max_bytes, const double gpu_overhead,
                    const double driver_overhead, 
                    const int threads_per_atom, FILE *screen);
@ -119,7 +117,7 @@ class PairGPUDevice {
  void output_kspace_times(UCL_Timer &time_in, UCL_Timer &time_out,
                           UCL_Timer & time_map, UCL_Timer & time_rho,
                           UCL_Timer &time_interp, 
-                           PairGPUAns<numtyp,acctyp> &ans, 
+                           Answer<numtyp,acctyp> &ans, 
                           const double max_bytes, const double cpu_time,
                           const double cpu_idle_time, FILE *screen);

@ -130,7 +128,7 @@ class PairGPUDevice {
  void clear_device();

  /// Add an answer object for putting forces, energies, etc from GPU to LAMMPS
-  inline void add_ans_object(PairGPUAns<numtyp,acctyp> *ans)
+  inline void add_ans_object(Answer<numtyp,acctyp> *ans)
    { ans_queue.push(ans); }

  /// Add "answers" (force,energies,etc.) into LAMMPS structures
@ -248,12 +246,12 @@ class PairGPUDevice {
  // --------------------------- ATOM DATA -------------------------- 

  /// Atom Data
-  PairGPUAtom<numtyp,acctyp> atom;
+  Atom<numtyp,acctyp> atom;

  // --------------------------- NBOR DATA ----------------------------
  
  /// Neighbor Data
-  PairGPUNborShared _nbor_shared;
+  NeighborShared _neighbor_shared;

  // ------------------------ LONG RANGE DATA -------------------------
  
@ -274,7 +272,7 @@ class PairGPUDevice {
  }

 private:
-  std::queue<PairGPUAns<numtyp,acctyp> *> ans_queue;
+  std::queue<Answer<numtyp,acctyp> *> ans_queue;
  int _init_count;
  bool _device_init, _host_timer_started, _time_device;
  MPI_Comm _comm_world, _comm_replica, _comm_gpu;
--- a/lib/gpu/ellipsoid_extra.h
+++ b/lib/gpu/ellipsoid_extra.h
@ -1,7 +1,7 @@
 // **************************************************************************
 //                              ellipsoid_extra.h
 //                             -------------------
-//                               W. Michael Brown
+//                           W. Michael Brown (ORNL)
 //
 //  Device code for Ellipsoid math routines
 //
@ -13,8 +13,8 @@
 //    email                : brownw@ornl.gov
 // ***************************************************************************/

-#ifndef ELLIPSOID_EXTRA_H
-#define ELLIPSOID_EXTRA_H
+#ifndef LAL_ELLIPSOID_EXTRA_H
+#define LAL_ELLIPSOID_EXTRA_H

 enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};

@ -409,16 +409,4 @@ __inline void gpu_times_column3(const numtyp m[9], const numtyp v[3],
  ans[2] = m[6]*v[0] + m[7]*v[1] + m[8]*v[2];
 }

-
-
-
-
-
-
-
-
-
-
-
-
 #endif
--- a/lib/gpu/ellipsoid_nbor.cu
+++ b/lib/gpu/ellipsoid_nbor.cu
@ -1,7 +1,7 @@
 // **************************************************************************
 //                              ellipsoid_nbor.cu
 //                             -------------------
-//                               W. Michael Brown
+//                           W. Michael Brown (ORNL)
 //
 //  Device code for Ellipsoid neighbor routines
 //
@ -13,9 +13,6 @@
 //    email                : brownw@ornl.gov
 // ***************************************************************************/

-#ifndef ELLIPSOID_NBOR_H
-#define ELLIPSOID_NBOR_H
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -161,5 +158,3 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form,
    dev_nbor[ii+nbor_pitch]=newj;
  }
 }
-
-#endif
--- a/lib/gpu/gayberne.cpp
+++ b/lib/gpu/gayberne.cpp
@ -24,7 +24,7 @@
 using namespace LAMMPS_AL;

 #define GayBerneT GayBerne<numtyp, acctyp>
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
 GayBerneT::GayBerne() : BaseEllipsoid<numtyp,acctyp>(),
--- a/lib/gpu/lj.cpp
+++ b/lib/gpu/lj.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define LJL_GPU_MemoryT LJL_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-LJL_GPU_MemoryT::LJL_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+LJL_GPU_MemoryT::LJL_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }

 template <class numtyp, class acctyp>
--- a/lib/gpu/lj.h
+++ b/lib/gpu/lj.h
@ -21,7 +21,7 @@
 #include "base_atomic.h"

 template <class numtyp, class acctyp>
-class LJL_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class LJL_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
 public:
  LJL_GPU_Memory();
  ~LJL_GPU_Memory(); 
--- a/lib/gpu/lj96.cpp
+++ b/lib/gpu/lj96.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define LJ96_GPU_MemoryT LJ96_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-LJ96_GPU_MemoryT::LJ96_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+LJ96_GPU_MemoryT::LJ96_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }

 template <class numtyp, class acctyp>
--- a/lib/gpu/lj96.h
+++ b/lib/gpu/lj96.h
@ -21,7 +21,7 @@
 #include "base_atomic.h"

 template <class numtyp, class acctyp>
-class LJ96_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class LJ96_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
 public:
  LJ96_GPU_Memory();
  ~LJ96_GPU_Memory();
--- a/lib/gpu/lj_class2_long.cpp
+++ b/lib/gpu/lj_class2_long.cpp
@ -25,10 +25,10 @@ using namespace LAMMPS_AL;

 #define LJClass2LongT LJClass2Long<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-LJClass2LongT::LJClass2Long() : ChargeGPUMemory<numtyp,acctyp>(),
+LJClass2LongT::LJClass2Long() : BaseCharge<numtyp,acctyp>(),
                                _allocated(false) {
 }

--- a/lib/gpu/lj_class2_long.h
+++ b/lib/gpu/lj_class2_long.h
@ -21,7 +21,7 @@
 namespace LAMMPS_AL {

 template <class numtyp, class acctyp>
-class LJClass2Long : public ChargeGPUMemory<numtyp, acctyp> {
+class LJClass2Long : public BaseCharge<numtyp, acctyp> {
 public:
  LJClass2Long();
  ~LJClass2Long();
--- a/lib/gpu/lj_coul.cpp
+++ b/lib/gpu/lj_coul.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define LJC_GPU_MemoryT LJC_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-LJC_GPU_MemoryT::LJC_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+LJC_GPU_MemoryT::LJC_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                    _allocated(false) {
 }

--- a/lib/gpu/lj_coul.h
+++ b/lib/gpu/lj_coul.h
@ -21,7 +21,7 @@
 #include "base_charge.h"

 template <class numtyp, class acctyp>
-class LJC_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class LJC_GPU_Memory : public BaseCharge<numtyp, acctyp> {
 public:
  LJC_GPU_Memory();
  ~LJC_GPU_Memory();
--- a/lib/gpu/lj_coul_long.cpp
+++ b/lib/gpu/lj_coul_long.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define LJCL_GPU_MemoryT LJCL_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-LJCL_GPU_MemoryT::LJCL_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
+LJCL_GPU_MemoryT::LJCL_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
                                    _allocated(false) {
 }

--- a/lib/gpu/lj_coul_long.h
+++ b/lib/gpu/lj_coul_long.h
@ -21,7 +21,7 @@
 #include "base_charge.h"

 template <class numtyp, class acctyp>
-class LJCL_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
+class LJCL_GPU_Memory : public BaseCharge<numtyp, acctyp> {
 public:
  LJCL_GPU_Memory();
  ~LJCL_GPU_Memory();
--- a/lib/gpu/lj_expand.cpp
+++ b/lib/gpu/lj_expand.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define LJE_GPU_MemoryT LJE_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-LJE_GPU_MemoryT::LJE_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+LJE_GPU_MemoryT::LJE_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }

 template <class numtyp, class acctyp>
--- a/lib/gpu/lj_expand.h
+++ b/lib/gpu/lj_expand.h
@ -21,7 +21,7 @@
 #include "base_atomic.h"

 template <class numtyp, class acctyp>
-class LJE_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class LJE_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
 public:
  LJE_GPU_Memory();
  ~LJE_GPU_Memory();
--- a/lib/gpu/morse.cpp
+++ b/lib/gpu/morse.cpp
@ -25,10 +25,10 @@
 #include <cassert>
 #define MOR_GPU_MemoryT MOR_GPU_Memory<numtyp, acctyp>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
-MOR_GPU_MemoryT::MOR_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
+MOR_GPU_MemoryT::MOR_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
 }

 template <class numtyp, class acctyp>
--- a/lib/gpu/morse.h
+++ b/lib/gpu/morse.h
@ -21,7 +21,7 @@
 #include "base_atomic.h"

 template <class numtyp, class acctyp>
-class MOR_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
+class MOR_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
 public:
  MOR_GPU_Memory();
  ~MOR_GPU_Memory(); 
--- a/lib/gpu/nbor_cpu.cu
+++ b/lib/gpu/nbor_cpu.cu
@ -1,46 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
-
-#ifdef NV_KERNEL
-
-#include "geryon/ucl_nv_kernel.h"
-
-#else
-
-#define GLOBAL_ID_X get_global_id(0)
-
-#endif
-
-__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
-                            const int inum) {
-  // ii indexes the two interacting particles in gi
-  int ii=GLOBAL_ID_X;
-
-  if (ii<inum) {
-    __global int *nbor=dev_nbor+ii+inum;
-    int numj=*nbor;
-    nbor+=inum;
-    __global int *list=dev_ij+*nbor;
-    __global int *list_end=list+numj;
-  
-    for ( ; list<list_end; list++) {
-      *nbor=*list;
-      nbor+=inum;
-    }
-  } // if ii
-}
-
--- a/lib/gpu/nbor_shared.h
+++ b/lib/gpu/nbor_shared.h
@ -1,58 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
-
-#ifndef PAIR_GPU_NBOR_SHARED_H
-#define PAIR_GPU_NBOR_SHARED_H
-
-#ifdef USE_OPENCL
-
-#include "geryon/ocl_kernel.h"
-#include "geryon/ocl_texture.h"
-using namespace ucl_opencl;
-
-#else
-
-#include "geryon/nvd_kernel.h"
-#include "geryon/nvd_texture.h"
-using namespace ucl_cudadr;
-
-#endif
-
-class PairGPUNborShared {
- public:
-  PairGPUNborShared() : _compiled(false) {}
-  ~PairGPUNborShared() { clear(); }
- 
-  /// Free all memory on host and device
-  void clear();
-
-  /// Texture for cached position/type access with CUDA
-  UCL_Texture neigh_tex;
-
-  /// Compile kernels for neighbor lists
-  void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
-
-  // ----------------------------- Kernels
-  UCL_Program *nbor_program, *build_program;
-  UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
-  UCL_Kernel k_transpose, k_special;
-
- private:
-  bool _compiled, _gpu_nbor;
-};
-
-#endif
--- a/lib/gpu/neighbor.cpp
+++ b/lib/gpu/neighbor.cpp
@ -1,27 +1,25 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                 neighbor.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)
+                              Peng Wang (Nvidia)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for handling neighbor lists

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
-                         Peng Wang (Nvidia), penwang@nvidia.com
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov, penwang@nvidia.com
+ ***************************************************************************/

 #include "precision.h"
-#include "nbor.h"
+#include "neighbor.h"
 #include "device.h"
 #include "math.h"

-int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
+int Neighbor::bytes_per_atom(const int max_nbors) const {
  if (_gpu_nbor)
    return (max_nbors+2)*sizeof(int);
  else if (_use_packing)
@ -30,7 +28,7 @@ int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
    return (max_nbors+3)*sizeof(int);
 }

-bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
+bool Neighbor::init(NeighborShared *shared, const int inum,
                       const int host_inum, const int max_nbors, 
                       const int maxspecial, UCL_Device &devi, 
                       const bool gpu_nbor, const int gpu_host, 
@ -89,7 +87,7 @@ bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
  return success;
 }

-void PairGPUNbor::alloc(bool &success) { 
+void Neighbor::alloc(bool &success) { 
  dev_nbor.clear();
  host_acc.clear();
  int nt=_max_atoms+_max_host;
@ -156,7 +154,7 @@ void PairGPUNbor::alloc(bool &success) {
  _allocated=true;
 }
  
-void PairGPUNbor::clear() {
+void Neighbor::clear() {
  _gpu_bytes=0.0;
  _cell_bytes=0.0;
  _c_bytes=0.0;
@ -181,7 +179,7 @@ void PairGPUNbor::clear() {
  }
 }

-double PairGPUNbor::host_memory_usage() const {
+double Neighbor::host_memory_usage() const {
  if (_gpu_nbor) {
    if (_gpu_host)
      return host_nbor.row_bytes()*host_nbor.rows()+host_ilist.row_bytes()+
@ -190,10 +188,10 @@ double PairGPUNbor::host_memory_usage() const {
      return 0;
  } else 
    return host_packed.row_bytes()*host_packed.rows()+host_acc.row_bytes()+
-           sizeof(PairGPUNbor);
+           sizeof(Neighbor);
 }

-void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
+void Neighbor::get_host(const int inum, int *ilist, int *numj,
                           int **firstneigh, const int block_size) {  
  time_nbor.start();

@ -258,9 +256,9 @@ void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
 }

 template <class numtyp, class acctyp>
-void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
+void Neighbor::build_nbor_list(const int inum, const int host_inum,
                                  const int nall, 
-                                  PairGPUAtom<numtyp,acctyp> &atom, 
+                                  Atom<numtyp,acctyp> &atom, 
                                  double *sublo, double *subhi, int *tag, 
                                  int **nspecial, int **special, bool &success,
                                  int &mn) {
@ -399,8 +397,8 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
  time_nbor.stop();
 }

-template void PairGPUNbor::build_nbor_list<PRECISION,ACC_PRECISION>
+template void Neighbor::build_nbor_list<PRECISION,ACC_PRECISION>
     (const int inum, const int host_inum, const int nall,
-      PairGPUAtom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
+      Atom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
      int *, int **, int **, bool &success, int &mn);

--- a/lib/gpu/neighbor.h
+++ b/lib/gpu/neighbor.h
@ -1,25 +1,24 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                                  neighbor.h
+                             -------------------
+                            W. Michael Brown (ORNL)
+                              Peng Wang (Nvidia)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for handling neighbor lists

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+    begin                : 
+    email                : brownw@ornl.gov, penwang@nvidia.com
+ ***************************************************************************/

-#ifndef PAIR_GPU_NBOR_H
-#define PAIR_GPU_NBOR_H
+#ifndef LAL_NEIGHBOR_H
+#define LAL_NEIGHBOR_H

 #include "atom.h"
-#include "nbor_shared.h"
+#include "neighbor_shared.h"

 #define IJ_SIZE 131072

@ -37,10 +36,10 @@ using namespace ucl_cudadr;

 #endif

-class PairGPUNbor {
+class Neighbor {
 public:
-  PairGPUNbor() : _allocated(false), _use_packing(false) {}
-  ~PairGPUNbor() { clear(); }
+  Neighbor() : _allocated(false), _use_packing(false) {}
+  ~Neighbor() { clear(); }
 
  /// Determine whether neighbor unpacking should be used
  /** If false, twice as much memory is reserved to allow unpacking neighbors by 
@ -57,7 +56,7 @@ class PairGPUNbor {
    *                 2 if gpu_nbor is true, and host needs a full nbor list
    * \param pre_cut True if cutoff test will be performed in separate kernel
    *                than the force kernel **/
-  bool init(PairGPUNborShared *shared, const int inum, const int host_inum,
+  bool init(NeighborShared *shared, const int inum, const int host_inum,
            const int max_nbors, const int maxspecial, UCL_Device &dev,
            const bool gpu_nbor, const int gpu_host, const bool pre_cut,
            const int block_cell_2d, const int block_cell_id, 
@ -138,7 +137,7 @@ class PairGPUNbor {
  /// Build nbor list on the device
  template <class numtyp, class acctyp>
  void build_nbor_list(const int inum, const int host_inum, const int nall,
-                       PairGPUAtom<numtyp,acctyp> &atom, double *sublo,
+                       Atom<numtyp,acctyp> &atom, double *sublo,
                       double *subhi, int *tag, int **nspecial, int **special, 
                       bool &success, int &max_nbors);

@ -187,7 +186,7 @@ class PairGPUNbor {
  UCL_Timer time_nbor, time_kernel;
  
 private:
-  PairGPUNborShared *_shared;
+  NeighborShared *_shared;
  UCL_Device *dev;
  bool _allocated, _use_packing;
  int _max_atoms, _max_nbors, _max_host, _nbor_pitch, _maxspecial;
--- a/lib/gpu/neighbor_cpu.cu
+++ b/lib/gpu/neighbor_cpu.cu
@ -0,0 +1,44 @@
+// **************************************************************************
+//                                  atom.cu
+//                             -------------------
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for handling CPU generated neighbor lists
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : brownw@ornl.gov
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+
+#include "geryon/ucl_nv_kernel.h"
+
+#else
+
+#define GLOBAL_ID_X get_global_id(0)
+
+#endif
+
+__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
+                            const int inum) {
+  // ii indexes the two interacting particles in gi
+  int ii=GLOBAL_ID_X;
+
+  if (ii<inum) {
+    __global int *nbor=dev_nbor+ii+inum;
+    int numj=*nbor;
+    nbor+=inum;
+    __global int *list=dev_ij+*nbor;
+    __global int *list_end=list+numj;
+  
+    for ( ; list<list_end; list++) {
+      *nbor=*list;
+      nbor+=inum;
+    }
+  } // if ii
+}
+
--- a/lib/gpu/neighbor_gpu.cu
+++ b/lib/gpu/neighbor_gpu.cu
@ -1,20 +1,18 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Peng Wang (Nvidia), penwang@nvidia.com
-                         Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+// **************************************************************************
+//                                  atom.cu
+//                             -------------------
+//                              Peng Wang (Nvidia)
+//                           W. Michael Brown (ORNL)
+//
+//  Device code for handling GPU generated neighbor lists
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : 
+//    email                : penwang@nvidia.com, brownw@ornl.gov
+// ***************************************************************************/

 #ifdef NV_KERNEL

--- a/lib/gpu/neighbor_shared.cpp
+++ b/lib/gpu/neighbor_shared.cpp
@ -1,21 +1,19 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
+/***************************************************************************
+                             neighbor_shared.cpp
+                             -------------------
+                            W. Michael Brown (ORNL)

-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
+  Class for management of data shared by all neighbor lists

-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
- 
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________

-#include "nbor_shared.h"
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#include "neighbor_shared.h"

 #ifdef USE_OPENCL
 #include "nbor_cl.h"
@ -24,7 +22,7 @@
 #include "pair_gpu_build_ptx.h"
 #endif
  
-void PairGPUNborShared::clear() {
+void NeighborShared::clear() {
  if (_compiled) {
    if (_gpu_nbor) {
      k_cell_id.clear();
@ -41,7 +39,7 @@ void PairGPUNborShared::clear() {
  }
 }

-void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
+void NeighborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
  if (_compiled)
  	return;
  	
@ -50,7 +48,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {

  if (gpu_nbor==false) {
    nbor_program=new UCL_Program(dev);
-    nbor_program->load_string(nbor_cpu,flags.c_str());
+    nbor_program->load_string(neighbor_cpu,flags.c_str());
    k_nbor.set_function(*nbor_program,"kernel_unpack");
  } else {
    build_program=new UCL_Program(dev);
@ -58,7 +56,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
    std::cerr << "CANNOT CURRENTLY USE GPU NEIGHBORING WITH OPENCL\n";
    exit(1);
    #else
-    build_program->load_string(nbor_gpu,flags.c_str());
+    build_program->load_string(neighbor_gpu,flags.c_str());
    #endif
    k_cell_id.set_function(*build_program,"calc_cell_id");
    k_cell_counts.set_function(*build_program,"kernel_calc_cell_counts");
--- a/lib/gpu/neighbor_shared.h
+++ b/lib/gpu/neighbor_shared.h
@ -0,0 +1,56 @@
+/***************************************************************************
+                              neighbor_shared.h
+                             -------------------
+                            W. Michael Brown (ORNL)
+
+  Class for management of data shared by all neighbor lists
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov
+ ***************************************************************************/
+
+#ifndef LAL_NEIGHBOR_SHARED_H
+#define LAL_NEIGHBOR_SHARED_H
+
+#ifdef USE_OPENCL
+
+#include "geryon/ocl_kernel.h"
+#include "geryon/ocl_texture.h"
+using namespace ucl_opencl;
+
+#else
+
+#include "geryon/nvd_kernel.h"
+#include "geryon/nvd_texture.h"
+using namespace ucl_cudadr;
+
+#endif
+
+class NeighborShared {
+ public:
+  NeighborShared() : _compiled(false) {}
+  ~NeighborShared() { clear(); }
+ 
+  /// Free all memory on host and device
+  void clear();
+
+  /// Texture for cached position/type access with CUDA
+  UCL_Texture neigh_tex;
+
+  /// Compile kernels for neighbor lists
+  void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
+
+  // ----------------------------- Kernels
+  UCL_Program *nbor_program, *build_program;
+  UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
+  UCL_Kernel k_transpose, k_special;
+
+ private:
+  bool _compiled, _gpu_nbor;
+};
+
+#endif
--- a/lib/gpu/pppm.cpp
+++ b/lib/gpu/pppm.cpp
@ -26,13 +26,13 @@

 #define PPPMGPUMemoryT PPPMGPUMemory<numtyp, acctyp, grdtyp, grdtyp4>

-extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
+extern Device<PRECISION,ACC_PRECISION> global_device;

 template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
 PPPMGPUMemoryT::PPPMGPUMemory() : _allocated(false), _compiled(false),
                                  _max_bytes(0) {
-  device=&pair_gpu_device;
-  ans=new PairGPUAns<numtyp,acctyp>();
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
 }

 template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
--- a/lib/gpu/pppm.h
+++ b/lib/gpu/pppm.h
@ -27,7 +27,7 @@
 #include "geryon/nvd_texture.h"
 #endif

-template <class numtyp, class acctyp> class PairGPUDevice;
+template <class numtyp, class acctyp> class Device;

 template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
 class PPPMGPUMemory {
@ -118,7 +118,7 @@ class PPPMGPUMemory {
  // -------------------------- DEVICE DATA ------------------------- 

  /// Device Properties and Atom and Neighbor storage
-  PairGPUDevice<numtyp,acctyp> *device;
+  Device<numtyp,acctyp> *device;

  /// Geryon device
  UCL_Device *ucl_device;
@ -132,7 +132,7 @@ class PPPMGPUMemory {
  // --------------------------- ATOM DATA --------------------------

  /// Atom Data
-  PairGPUAtom<numtyp,acctyp> *atom;
+  Atom<numtyp,acctyp> *atom;


  // --------------------------- GRID DATA --------------------------
@ -162,7 +162,7 @@ class PPPMGPUMemory {

  // ------------------------ FORCE/ENERGY DATA -----------------------

-  PairGPUAns<numtyp,acctyp> *ans;
+  Answer<numtyp,acctyp> *ans;

  // ------------------------- DEVICE KERNELS -------------------------
  UCL_Program *pppm_program;
--- a/lib/gpu/re_squared.cpp
+++ b/lib/gpu/re_squared.cpp
@ -24,7 +24,7 @@
 using namespace LAMMPS_AL;

 #define RESquaredT RESquared<numtyp, acctyp>
-extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
+extern Device<PRECISION,ACC_PRECISION> device;

 template <class numtyp, class acctyp>
 RESquaredT::RESquared() : BaseEllipsoid<numtyp,acctyp>(),