diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index 99f466965b..3587e4ebd4 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -31,8 +31,8 @@ UCL_H = $(wildcard ./geryon/ucl*.h) NVC_H = $(wildcard ./geryon/nvc*.h) $(UCL_H) NVD_H = $(wildcard ./geryon/nvd*.h) $(UCL_H) nv_kernel_def.h # Headers for Pair Stuff -PAIR_H = atom.h ans.h nbor_shared.h \ - nbor.h precision.h device.h \ +PAIR_H = atom.h answer.h neighbor_shared.h \ + neighbor.h precision.h device.h \ balance.h pppm.h ALL_H = $(NVD_H) $(PAIR_H) @@ -42,7 +42,7 @@ CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \ $(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \ $(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \ - $(OBJ_DIR)/nbor.o $(OBJ_DIR)/nbor_shared.o \ + $(OBJ_DIR)/nbor.o $(OBJ_DIR)/neighbor_shared.o \ $(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \ $(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \ $(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \ @@ -62,8 +62,8 @@ OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \ $(CUDPP) PTXS = $(OBJ_DIR)/device.ptx \ $(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom_ptx.h \ - $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \ - $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \ + $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \ + $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \ $(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \ $(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \ $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \ @@ -111,26 +111,26 @@ $(OBJ_DIR)/atom_ptx.h: $(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom.o: atom.cpp atom.h $(NVD_H) $(OBJ_DIR)/atom_ptx.h $(CUDR) -o $@ -c atom.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/ans.o: ans.cpp ans.h $(NVD_H) - $(CUDR) -o $@ -c ans.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/ans.o: answer.cpp answer.h $(NVD_H) + $(CUDR) -o $@ -c answer.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/nbor_cpu.ptx: nbor_cpu.cu - $(CUDA) --ptx -DNV_KERNEL -o $@ nbor_cpu.cu +$(OBJ_DIR)/neighbor_cpu.ptx: neighbor_cpu.cu + $(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_cpu.cu -$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/nbor_cpu.ptx - $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h +$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/neighbor_cpu.ptx + $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h -$(OBJ_DIR)/nbor_gpu.ptx: nbor_gpu.cu - $(CUDA) --ptx -DNV_KERNEL -o $@ nbor_gpu.cu +$(OBJ_DIR)/neighbor_gpu.ptx: neighbor_gpu.cu + $(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_gpu.cu -$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/nbor_gpu.ptx - $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h +$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/neighbor_gpu.ptx + $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h -$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H) - $(CUDR) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H) + $(CUDR) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h nbor_shared.h $(NVD_H) - $(CUDR) -o $@ -c nbor.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h neighbor_shared.h $(NVD_H) + $(CUDR) -o $@ -c neighbor.cpp -I$(OBJ_DIR) $(OBJ_DIR)/device.ptx: device.cu $(CUDA) --ptx -DNV_KERNEL -o $@ device.cu diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile index 6f91dfe9c4..280baf8980 100644 --- a/lib/gpu/Opencl.makefile +++ b/lib/gpu/Opencl.makefile @@ -24,15 +24,15 @@ OCL_LIB = $(LIB_DIR)/libgpu.a UCL_H = $(wildcard ./geryon/ucl*.h) OCL_H = $(wildcard ./geryon/ocl*.h) $(UCL_H) # Headers for Pair Stuff -PAIR_H = atom.h ans.h nbor_shared.h \ - nbor.h precision.h device.h \ +PAIR_H = atom.h answer.h neighbor_shared.h \ + neighbor.h precision.h device.h \ balance.h pppm.h ALL_H = $(OCL_H) $(PAIR_H) EXECS = $(BIN_DIR)/ocl_get_devices OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \ - $(OBJ_DIR)/nbor_shared.o $(OBJ_DIR)/nbor.o \ + $(OBJ_DIR)/neighbor_shared.o $(OBJ_DIR)/nbor.o \ $(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \ $(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \ $(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \ @@ -69,17 +69,17 @@ $(OBJ_DIR)/atom_cl.h: atom.cu $(OBJ_DIR)/atom.o: atom.cpp atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h $(OCL) -o $@ -c atom.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/ans.o: ans.cpp ans.h $(OCL_H) - $(OCL) -o $@ -c ans.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/ans.o: answer.cpp answer.h $(OCL_H) + $(OCL) -o $@ -c answer.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/nbor_cl.h: nbor_cpu.cu - $(BSH) ./geryon/file_to_cstr.sh nbor_cpu.cu $(OBJ_DIR)/nbor_cl.h +$(OBJ_DIR)/nbor_cl.h: neighbor_cpu.cu + $(BSH) ./geryon/file_to_cstr.sh neighbor_cpu.cu $(OBJ_DIR)/nbor_cl.h -$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h - $(OCL) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h + $(OCL) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h $(OCL_H) nbor_shared.h - $(OCL) -o $@ -c nbor.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h $(OCL_H) neighbor_shared.h + $(OCL) -o $@ -c neighbor.cpp -I$(OBJ_DIR) $(OBJ_DIR)/pair_gpu_dev_cl.h: device.cu $(BSH) ./geryon/file_to_cstr.sh device.cu $(OBJ_DIR)/pair_gpu_dev_cl.h diff --git a/lib/gpu/ans.cpp b/lib/gpu/answer.cpp similarity index 82% rename from lib/gpu/ans.cpp rename to lib/gpu/answer.cpp index f159b73efe..5dc818bb48 100644 --- a/lib/gpu/ans.cpp +++ b/lib/gpu/answer.cpp @@ -1,31 +1,29 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + answer.cpp + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for data management of forces, torques, energies, and virials - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -#include "ans.h" + begin : + email : brownw@ornl.gov + ***************************************************************************/ -#define PairGPUAnsT PairGPUAns +#include "answer.h" + +#define AnswerT Answer template -PairGPUAnsT::PairGPUAns() : _allocated(false),_eflag(false),_vflag(false), +AnswerT::Answer() : _allocated(false),_eflag(false),_vflag(false), _inum(0),_ilist(NULL),_newton(false) { } template -int PairGPUAnsT::bytes_per_atom() const { +int AnswerT::bytes_per_atom() const { int bytes=11*sizeof(acctyp); if (_rot) bytes+=4*sizeof(acctyp); @@ -35,7 +33,7 @@ int PairGPUAnsT::bytes_per_atom() const { } template -bool PairGPUAnsT::alloc(const int inum) { +bool AnswerT::alloc(const int inum) { _max_local=static_cast(static_cast(inum)*1.10); bool success=true; @@ -70,7 +68,7 @@ bool PairGPUAnsT::alloc(const int inum) { } template -bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot, +bool AnswerT::init(const int inum, const bool charge, const bool rot, UCL_Device &devi) { clear(); @@ -100,7 +98,7 @@ bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot, } template -bool PairGPUAnsT::add_fields(const bool charge, const bool rot) { +bool AnswerT::add_fields(const bool charge, const bool rot) { bool realloc=false; if (charge && _charge==false) { _charge=true; @@ -122,7 +120,7 @@ bool PairGPUAnsT::add_fields(const bool charge, const bool rot) { } template -void PairGPUAnsT::clear_resize() { +void AnswerT::clear_resize() { if (!_allocated) return; _allocated=false; @@ -134,7 +132,7 @@ void PairGPUAnsT::clear_resize() { } template -void PairGPUAnsT::clear() { +void AnswerT::clear() { _gpu_bytes=0; if (!_allocated) return; @@ -148,7 +146,7 @@ void PairGPUAnsT::clear() { } template -double PairGPUAnsT::host_memory_usage() const { +double AnswerT::host_memory_usage() const { int atom_bytes=4; if (_charge) atom_bytes+=1; @@ -156,11 +154,11 @@ double PairGPUAnsT::host_memory_usage() const { atom_bytes+=4; int ans_bytes=atom_bytes+_ev_fields; return ans_bytes*(_max_local)*sizeof(acctyp)+ - sizeof(PairGPUAns); + sizeof(Answer); } template -void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag, +void AnswerT::copy_answers(const bool eflag, const bool vflag, const bool ef_atom, const bool vf_atom) { time_answer.start(); _eflag=eflag; @@ -184,7 +182,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag, } template -void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag, +void AnswerT::copy_answers(const bool eflag, const bool vflag, const bool ef_atom, const bool vf_atom, int *ilist) { _ilist=ilist; @@ -192,7 +190,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag, } template -double PairGPUAnsT::energy_virial(double *eatom, double **vatom, +double AnswerT::energy_virial(double *eatom, double **vatom, double *virial) { if (_eflag==false && _vflag==false) return 0.0; @@ -268,7 +266,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom, } template -double PairGPUAnsT::energy_virial(double *eatom, double **vatom, +double AnswerT::energy_virial(double *eatom, double **vatom, double *virial, double &ecoul) { if (_eflag==false && _vflag==false) return 0.0; @@ -359,7 +357,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom, } template -void PairGPUAnsT::get_answers(double **f, double **tor) { +void AnswerT::get_answers(double **f, double **tor) { acctyp *ap=host_ans.begin(); if (_ilist==NULL) { for (int i=0; i<_inum; i++) { @@ -404,4 +402,4 @@ void PairGPUAnsT::get_answers(double **f, double **tor) { } } -template class PairGPUAns; +template class Answer; diff --git a/lib/gpu/ans.h b/lib/gpu/answer.h similarity index 81% rename from lib/gpu/ans.h rename to lib/gpu/answer.h index 61e97c6fe3..715b0d0d54 100644 --- a/lib/gpu/ans.h +++ b/lib/gpu/answer.h @@ -1,22 +1,20 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + answer.h + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for data management of forces, torques, energies, and virials - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + begin : + email : brownw@ornl.gov + ***************************************************************************/ -#ifndef PAIR_GPU_ANS_H -#define PAIR_GPU_ANS_H +#ifndef LAL_ANSWER_H +#define LAL_ANSWER_H #include #include "mpi.h" @@ -38,10 +36,10 @@ using namespace ucl_cudadr; #include "precision.h" template -class PairGPUAns { +class Answer { public: - PairGPUAns(); - ~PairGPUAns() { clear(); } + Answer(); + ~Answer() { clear(); } /// Current number of local atoms stored inline int inum() const { return _inum; } diff --git a/lib/gpu/atom.cpp b/lib/gpu/atom.cpp index aa7257443d..788522cfd7 100644 --- a/lib/gpu/atom.cpp +++ b/lib/gpu/atom.cpp @@ -1,26 +1,24 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + atom.cpp + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for particle data management - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov + ***************************************************************************/ #include "atom.h" -#define PairGPUAtomT PairGPUAtom +#define AtomT Atom template -PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false), +AtomT::Atom() : _compiled(false),_allocated(false), _max_gpu_bytes(0) { #ifndef USE_OPENCL sort_config.op = CUDPP_ADD; @@ -31,7 +29,7 @@ PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false), } template -int PairGPUAtomT::bytes_per_atom() const { +int AtomT::bytes_per_atom() const { int id_space=0; if (_gpu_nbor) id_space=2; @@ -44,7 +42,7 @@ int PairGPUAtomT::bytes_per_atom() const { } template -bool PairGPUAtomT::alloc(const int nall) { +bool AtomT::alloc(const int nall) { _max_atoms=static_cast(static_cast(nall)*1.10); bool success=true; @@ -138,7 +136,7 @@ bool PairGPUAtomT::alloc(const int nall) { } template -bool PairGPUAtomT::add_fields(const bool charge, const bool rot, +bool AtomT::add_fields(const bool charge, const bool rot, const bool gpu_nbor, const bool bonds) { bool realloc=false; if (charge && _charge==false) { @@ -167,7 +165,7 @@ bool PairGPUAtomT::add_fields(const bool charge, const bool rot, } template -bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot, +bool AtomT::init(const int nall, const bool charge, const bool rot, UCL_Device &devi, const bool gpu_nbor, const bool bonds) { clear(); @@ -206,7 +204,7 @@ bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot, } template -void PairGPUAtomT::clear_resize() { +void AtomT::clear_resize() { if (!_allocated) return; _allocated=false; @@ -240,7 +238,7 @@ void PairGPUAtomT::clear_resize() { } template -void PairGPUAtomT::clear() { +void AtomT::clear() { _max_gpu_bytes=0; if (!_allocated) return; @@ -260,19 +258,19 @@ void PairGPUAtomT::clear() { } template -double PairGPUAtomT::host_memory_usage() const { +double AtomT::host_memory_usage() const { int atom_bytes=4; if (_charge) atom_bytes+=1; if (_rot) atom_bytes+=4; return _max_atoms*atom_bytes*sizeof(numtyp)+ - sizeof(PairGPUAtom); + sizeof(Atom); } // Sort arrays for neighbor list calculation template -void PairGPUAtomT::sort_neighbor(const int num_atoms) { +void AtomT::sort_neighbor(const int num_atoms) { #ifndef USE_OPENCL CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(), (int *)dev_particle_id.begin(), @@ -292,7 +290,7 @@ void PairGPUAtomT::sort_neighbor(const int num_atoms) { #endif template -void PairGPUAtomT::compile_kernels(UCL_Device &dev) { +void AtomT::compile_kernels(UCL_Device &dev) { atom_program=new UCL_Program(dev); atom_program->load_string(atom,""); k_cast_x.set_function(*atom_program,"kernel_cast_x"); @@ -301,4 +299,4 @@ void PairGPUAtomT::compile_kernels(UCL_Device &dev) { #endif -template class PairGPUAtom; +template class Atom; diff --git a/lib/gpu/atom.cu b/lib/gpu/atom.cu index ab79ac6e9c..06c194e5de 100644 --- a/lib/gpu/atom.cu +++ b/lib/gpu/atom.cu @@ -1,19 +1,17 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ +// ************************************************************************** +// atom.cu +// ------------------- +// W. Michael Brown (ORNL) +// +// Device code for atom data casting +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : +// email : brownw@ornl.gov +// ***************************************************************************/ #ifdef NV_KERNEL #include "geryon/ucl_nv_kernel.h" diff --git a/lib/gpu/atom.h b/lib/gpu/atom.h index 25fbf0899c..4b4f72095f 100644 --- a/lib/gpu/atom.h +++ b/lib/gpu/atom.h @@ -1,19 +1,17 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + atom.h + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for particle data management - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + begin : + email : brownw@ornl.gov + ***************************************************************************/ #ifndef PAIR_GPU_ATOM_H #define PAIR_GPU_ATOM_H @@ -41,10 +39,10 @@ using namespace ucl_cudadr; #include "precision.h" template -class PairGPUAtom { +class Atom { public: - PairGPUAtom(); - ~PairGPUAtom() { clear(); } + Atom(); + ~Atom() { clear(); } /// Maximum number of atoms that can be stored with current allocation inline int max_atoms() const { return _max_atoms; } diff --git a/lib/gpu/balance.h b/lib/gpu/balance.h index 465dde2c1c..aa23b9af3b 100644 --- a/lib/gpu/balance.h +++ b/lib/gpu/balance.h @@ -1,22 +1,20 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + balance.h + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for host-device load balancing - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -#ifndef PAIR_GPU_BALANCE_H -#define PAIR_GPU_BALANCE_H + begin : + email : brownw@ornl.gov + ***************************************************************************/ + +#ifndef LAL_BALANCE_H +#define LAL_BALANCE_H #include "device.h" #include @@ -27,13 +25,13 @@ /// Host/device load balancer template -class PairGPUBalance { +class Balance { public: - inline PairGPUBalance() : _init_done(false), _measure_this_step(false) {} - inline ~PairGPUBalance() { clear(); } + inline Balance() : _init_done(false), _measure_this_step(false) {} + inline ~Balance() { clear(); } /// Clear any old data and setup for new LAMMPS run - inline void init(PairGPUDevice *gpu, const bool gpu_nbor, + inline void init(Device *gpu, const bool gpu_nbor, const double split); /// Clear all host and device data @@ -107,7 +105,7 @@ class PairGPUBalance { } private: - PairGPUDevice *_device; + Device *_device; UCL_Timer _device_time; bool _init_done, _gpu_nbor; @@ -119,10 +117,10 @@ class PairGPUBalance { int _inum, _inum_full, _timestep; }; -#define PairGPUBalanceT PairGPUBalance +#define BalanceT Balance template -void PairGPUBalanceT::init(PairGPUDevice *gpu, +void BalanceT::init(Device *gpu, const bool gpu_nbor, const double split) { clear(); _gpu_nbor=gpu_nbor; @@ -145,7 +143,7 @@ void PairGPUBalanceT::init(PairGPUDevice *gpu, } template -int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) { +int BalanceT::get_gpu_count(const int ago, const int inum_full) { _measure_this_step=false; if (_load_balance) { if (_avg_count<11 || _timestep%_HD_BALANCE_EVERY==0) { @@ -164,7 +162,7 @@ int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) { } template -void PairGPUBalanceT::balance(const double cpu_time) { +void BalanceT::balance(const double cpu_time) { if (_measure_this_step) { _measure_this_step=false; double gpu_time=_device_time.seconds(); diff --git a/lib/gpu/base_atomic.cpp b/lib/gpu/base_atomic.cpp index dec46e9ae1..4386b3e36e 100644 --- a/lib/gpu/base_atomic.cpp +++ b/lib/gpu/base_atomic.cpp @@ -1,46 +1,44 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + base_atomic.h + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Base class for pair styles with per-particle data for position and type - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov + ***************************************************************************/ #include "base_atomic.h" -#define AtomicGPUMemoryT AtomicGPUMemory +#define BaseAtomicT BaseAtomic -extern PairGPUDevice pair_gpu_device; +extern Device global_device; template -AtomicGPUMemoryT::AtomicGPUMemory() : _compiled(false), _max_bytes(0) { - device=&pair_gpu_device; - ans=new PairGPUAns(); - nbor=new PairGPUNbor(); +BaseAtomicT::BaseAtomic() : _compiled(false), _max_bytes(0) { + device=&global_device; + ans=new Answer(); + nbor=new Neighbor(); } template -AtomicGPUMemoryT::~AtomicGPUMemory() { +BaseAtomicT::~BaseAtomic() { delete ans; delete nbor; } template -int AtomicGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const { +int BaseAtomicT::bytes_per_atom_atomic(const int max_nbors) const { return device->atom.bytes_per_atom()+ans->bytes_per_atom()+ nbor->bytes_per_atom(max_nbors); } template -int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, +int BaseAtomicT::init_atomic(const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen, @@ -49,7 +47,7 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, screen=_screen; bool gpu_nbor=false; - if (device->gpu_mode()==PairGPUDevice::GPU_NEIGH) + if (device->gpu_mode()==Device::GPU_NEIGH) gpu_nbor=true; int _gpu_host=0; @@ -90,12 +88,12 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, } template -void AtomicGPUMemoryT::estimate_gpu_overhead() { +void BaseAtomicT::estimate_gpu_overhead() { device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead); } template -void AtomicGPUMemoryT::clear_atomic() { +void BaseAtomicT::clear_atomic() { // Output any timing information acc_timers(); double avg_split=hd_balancer.all_avg_split(); @@ -123,7 +121,7 @@ void AtomicGPUMemoryT::clear_atomic() { // Copy neighbor list from host // --------------------------------------------------------------------------- template -int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist, +int * BaseAtomicT::reset_nbors(const int nall, const int inum, int *ilist, int *numj, int **firstneigh, bool &success) { success=true; @@ -147,7 +145,7 @@ int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist, // Build neighbor list on device // --------------------------------------------------------------------------- template -inline void AtomicGPUMemoryT::build_nbor_list(const int inum, +inline void BaseAtomicT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, @@ -176,7 +174,7 @@ inline void AtomicGPUMemoryT::build_nbor_list(const int inum, // Copy nbor list from host if necessary and then calculate forces, virials,.. // --------------------------------------------------------------------------- template -void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full, +void BaseAtomicT::compute(const int f_ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, @@ -217,7 +215,7 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full, // Reneighbor on GPU if necessary and then compute forces, virials, energies // --------------------------------------------------------------------------- template -int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full, +int ** BaseAtomicT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, int *tag, int **nspecial, int **special, const bool eflag, @@ -263,13 +261,13 @@ int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full, } template -double AtomicGPUMemoryT::host_memory_usage_atomic() const { +double BaseAtomicT::host_memory_usage_atomic() const { return device->atom.host_memory_usage()+nbor->host_memory_usage()+ - 4*sizeof(numtyp)+sizeof(AtomicGPUMemory); + 4*sizeof(numtyp)+sizeof(BaseAtomic); } template -void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) { +void BaseAtomicT::compile_kernels(UCL_Device &dev, const char *pair_str) { if (_compiled) return; @@ -285,5 +283,5 @@ void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) { _compiled=true; } -template class AtomicGPUMemory; +template class BaseAtomic; diff --git a/lib/gpu/base_atomic.h b/lib/gpu/base_atomic.h index dfde62f96d..d8d30856ae 100644 --- a/lib/gpu/base_atomic.h +++ b/lib/gpu/base_atomic.h @@ -1,22 +1,20 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + base_atomic.h + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Base class for pair styles with per-particle data for position and type - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -#ifndef ATOMIC_GPU_MEMORY_H -#define ATOMIC_GPU_MEMORY_H + begin : + email : brownw@ornl.gov + ***************************************************************************/ + +#ifndef LAL_BASE_ATOMIC_H +#define LAL_BASE_ATOMIC_H #include "device.h" #include "balance.h" @@ -29,10 +27,10 @@ #endif template -class AtomicGPUMemory { +class BaseAtomic { public: - AtomicGPUMemory(); - virtual ~AtomicGPUMemory(); + BaseAtomic(); + virtual ~BaseAtomic(); /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix @@ -150,7 +148,7 @@ class AtomicGPUMemory { // -------------------------- DEVICE DATA ------------------------- /// Device Properties and Atom and Neighbor storage - PairGPUDevice *device; + Device *device; /// Geryon device UCL_Device *ucl_device; @@ -159,7 +157,7 @@ class AtomicGPUMemory { UCL_Timer time_pair; /// Host device load balancer - PairGPUBalance hd_balancer; + Balance hd_balancer; /// LAMMPS pointer for screen output FILE *screen; @@ -167,16 +165,16 @@ class AtomicGPUMemory { // --------------------------- ATOM DATA -------------------------- /// Atom Data - PairGPUAtom *atom; + Atom *atom; // ------------------------ FORCE/ENERGY DATA ----------------------- - PairGPUAns *ans; + Answer *ans; // --------------------------- NBOR DATA ---------------------------- /// Neighbor data - PairGPUNbor *nbor; + Neighbor *nbor; /// True if we need to accumulate time for neighboring bool nbor_time_avail; diff --git a/lib/gpu/base_charge.cpp b/lib/gpu/base_charge.cpp index ee63138c29..46ff3bf9d2 100644 --- a/lib/gpu/base_charge.cpp +++ b/lib/gpu/base_charge.cpp @@ -1,46 +1,45 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + base_charge.cpp + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Base class for pair styles needing per-particle data for position, + charge, and type. - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov + ***************************************************************************/ #include "base_charge.h" -#define ChargeGPUMemoryT ChargeGPUMemory +#define BaseChargeT BaseCharge -extern PairGPUDevice pair_gpu_device; +extern Device global_device; template -ChargeGPUMemoryT::ChargeGPUMemory() : _compiled(false), _max_bytes(0) { - device=&pair_gpu_device; - ans=new PairGPUAns(); - nbor=new PairGPUNbor(); +BaseChargeT::BaseCharge() : _compiled(false), _max_bytes(0) { + device=&global_device; + ans=new Answer(); + nbor=new Neighbor(); } template -ChargeGPUMemoryT::~ChargeGPUMemory() { +BaseChargeT::~BaseCharge() { delete ans; delete nbor; } template -int ChargeGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const { +int BaseChargeT::bytes_per_atom_atomic(const int max_nbors) const { return device->atom.bytes_per_atom()+ans->bytes_per_atom()+ nbor->bytes_per_atom(max_nbors); } template -int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall, +int BaseChargeT::init_atomic(const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen, @@ -49,7 +48,7 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall, screen=_screen; bool gpu_nbor=false; - if (device->gpu_mode()==PairGPUDevice::GPU_NEIGH) + if (device->gpu_mode()==Device::GPU_NEIGH) gpu_nbor=true; int _gpu_host=0; @@ -92,12 +91,12 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall, } template -void ChargeGPUMemoryT::estimate_gpu_overhead() { +void BaseChargeT::estimate_gpu_overhead() { device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead); } template -void ChargeGPUMemoryT::clear_atomic() { +void BaseChargeT::clear_atomic() { // Output any timing information acc_timers(); double avg_split=hd_balancer.all_avg_split(); @@ -125,7 +124,7 @@ void ChargeGPUMemoryT::clear_atomic() { // Copy neighbor list from host // --------------------------------------------------------------------------- template -int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist, +int * BaseChargeT::reset_nbors(const int nall, const int inum, int *ilist, int *numj, int **firstneigh, bool &success) { success=true; @@ -150,7 +149,7 @@ int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist, // Build neighbor list on device // --------------------------------------------------------------------------- template -inline void ChargeGPUMemoryT::build_nbor_list(const int inum, +inline void BaseChargeT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, @@ -179,7 +178,7 @@ inline void ChargeGPUMemoryT::build_nbor_list(const int inum, // Copy nbor list from host if necessary and then calculate forces, virials,.. // --------------------------------------------------------------------------- template -void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full, +void BaseChargeT::compute(const int f_ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, @@ -226,7 +225,7 @@ void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full, // Reneighbor on GPU if necessary and then compute forces, virials, energies // --------------------------------------------------------------------------- template -int** ChargeGPUMemoryT::compute(const int ago, const int inum_full, +int** BaseChargeT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, int *tag, int **nspecial, int **special, const bool eflag, @@ -279,13 +278,13 @@ int** ChargeGPUMemoryT::compute(const int ago, const int inum_full, } template -double ChargeGPUMemoryT::host_memory_usage_atomic() const { +double BaseChargeT::host_memory_usage_atomic() const { return device->atom.host_memory_usage()+nbor->host_memory_usage()+ - 4*sizeof(numtyp)+sizeof(ChargeGPUMemory); + 4*sizeof(numtyp)+sizeof(BaseCharge); } template -void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) { +void BaseChargeT::compile_kernels(UCL_Device &dev, const char *pair_str) { if (_compiled) return; @@ -302,5 +301,5 @@ void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) { _compiled=true; } -template class ChargeGPUMemory; +template class BaseCharge; diff --git a/lib/gpu/base_charge.h b/lib/gpu/base_charge.h index e2dcea36e7..844a427e01 100644 --- a/lib/gpu/base_charge.h +++ b/lib/gpu/base_charge.h @@ -1,22 +1,21 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + base_charge.h + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Base class for pair styles needing per-particle data for position, + charge, and type. - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -#ifndef CHARGE_GPU_MEMORY_H -#define CHARGE_GPU_MEMORY_H + begin : + email : brownw@ornl.gov + ***************************************************************************/ + +#ifndef LAL_BASE_CHARGE_H +#define LAL_BASE_CHARGE_H #include "device.h" #include "balance.h" @@ -29,10 +28,10 @@ #endif template -class ChargeGPUMemory { +class BaseCharge { public: - ChargeGPUMemory(); - virtual ~ChargeGPUMemory(); + BaseCharge(); + virtual ~BaseCharge(); /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix @@ -146,7 +145,7 @@ class ChargeGPUMemory { // -------------------------- DEVICE DATA ------------------------- /// Device Properties and Atom and Neighbor storage - PairGPUDevice *device; + Device *device; /// Geryon device UCL_Device *ucl_device; @@ -155,7 +154,7 @@ class ChargeGPUMemory { UCL_Timer time_pair; /// Host device load balancer - PairGPUBalance hd_balancer; + Balance hd_balancer; /// LAMMPS pointer for screen output FILE *screen; @@ -163,17 +162,17 @@ class ChargeGPUMemory { // --------------------------- ATOM DATA -------------------------- /// Atom Data - PairGPUAtom *atom; + Atom *atom; // ------------------------ FORCE/ENERGY DATA ----------------------- - PairGPUAns *ans; + Answer *ans; // --------------------------- NBOR DATA ---------------------------- /// Neighbor data - PairGPUNbor *nbor; + Neighbor *nbor; /// True if we need to accumulate time for neighboring bool nbor_time_avail; diff --git a/lib/gpu/base_ellipsoid.cpp b/lib/gpu/base_ellipsoid.cpp index b2223db59d..a3518a0427 100644 --- a/lib/gpu/base_ellipsoid.cpp +++ b/lib/gpu/base_ellipsoid.cpp @@ -1,7 +1,7 @@ /*************************************************************************** base_ellipsoid.cpp ------------------- - W. Michael Brown + W. Michael Brown (ORNL) Base class for acceleration of ellipsoid potentials @@ -23,13 +23,13 @@ using namespace LAMMPS_AL; #endif #define BaseEllipsoidT BaseEllipsoid -extern PairGPUDevice pair_gpu_device; +extern Device global_device; template BaseEllipsoidT::BaseEllipsoid() : _compiled(false), _max_bytes(0) { - device=&pair_gpu_device; - ans=new PairGPUAns(); - nbor=new PairGPUNbor(); + device=&global_device; + ans=new Answer(); + nbor=new Neighbor(); } template @@ -56,7 +56,7 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall, _ellipsoid_sphere=ellip_sphere; bool gpu_nbor=false; - if (device->gpu_mode()==PairGPUDevice::GPU_NEIGH) + if (device->gpu_mode()==Device::GPU_NEIGH) gpu_nbor=true; int _gpu_host=0; diff --git a/lib/gpu/base_ellipsoid.h b/lib/gpu/base_ellipsoid.h index bcec50437b..163892db54 100644 --- a/lib/gpu/base_ellipsoid.h +++ b/lib/gpu/base_ellipsoid.h @@ -1,7 +1,7 @@ /*************************************************************************** base_ellipsoid.h ------------------- - W. Michael Brown + W. Michael Brown (ORNL) Base class for acceleration of ellipsoid potentials @@ -13,8 +13,8 @@ email : brownw@ornl.gov ***************************************************************************/ -#ifndef BASE_ELLIPSOID_H -#define BASE_ELLIPSOID_H +#ifndef LAL_BASE_ELLIPSOID_H +#define LAL_BASE_ELLIPSOID_H #include "device.h" #include "balance.h" @@ -182,7 +182,7 @@ class BaseEllipsoid { // -------------------------- DEVICE DATA ------------------------- /// Device Properties and Atom and Neighbor storage - PairGPUDevice *device; + Device *device; /// Geryon device UCL_Device *ucl_device; @@ -192,7 +192,7 @@ class BaseEllipsoid { UCL_Timer time_nbor3, time_ellipsoid3; /// Host device load balancer - PairGPUBalance hd_balancer; + Balance hd_balancer; /// LAMMPS pointer for screen output FILE *screen; @@ -200,7 +200,7 @@ class BaseEllipsoid { // --------------------------- ATOM DATA -------------------------- /// Atom Data - PairGPUAtom *atom; + Atom *atom; // --------------------------- TYPE DATA -------------------------- @@ -209,12 +209,12 @@ class BaseEllipsoid { // ------------------------ FORCE/ENERGY DATA ----------------------- - PairGPUAns *ans; + Answer *ans; // --------------------------- NBOR DATA ---------------------------- /// Neighbor data - PairGPUNbor *nbor; + Neighbor *nbor; /// ilist with particles sorted by type UCL_H_Vec host_olist; /// True if we need to accumulate time for neighboring diff --git a/lib/gpu/cg_cmm.cpp b/lib/gpu/cg_cmm.cpp index 4b83843abb..417479ea43 100644 --- a/lib/gpu/cg_cmm.cpp +++ b/lib/gpu/cg_cmm.cpp @@ -25,10 +25,10 @@ #include #define CMM_GPU_MemoryT CMM_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -CMM_GPU_MemoryT::CMM_GPU_Memory() : AtomicGPUMemory(), _allocated(false) { +CMM_GPU_MemoryT::CMM_GPU_Memory() : BaseAtomic(), _allocated(false) { } template diff --git a/lib/gpu/cg_cmm.h b/lib/gpu/cg_cmm.h index b43d4180fd..5173114760 100644 --- a/lib/gpu/cg_cmm.h +++ b/lib/gpu/cg_cmm.h @@ -21,7 +21,7 @@ #include "base_atomic.h" template -class CMM_GPU_Memory : public AtomicGPUMemory { +class CMM_GPU_Memory : public BaseAtomic { public: CMM_GPU_Memory(); ~CMM_GPU_Memory(); diff --git a/lib/gpu/cg_cmm_long.cpp b/lib/gpu/cg_cmm_long.cpp index 551ae867b0..8a584a2c32 100644 --- a/lib/gpu/cg_cmm_long.cpp +++ b/lib/gpu/cg_cmm_long.cpp @@ -25,10 +25,10 @@ #include #define CMML_GPU_MemoryT CMML_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -CMML_GPU_MemoryT::CMML_GPU_Memory() : ChargeGPUMemory(), +CMML_GPU_MemoryT::CMML_GPU_Memory() : BaseCharge(), _allocated(false) { } diff --git a/lib/gpu/cg_cmm_long.h b/lib/gpu/cg_cmm_long.h index a3324c5040..079472be70 100644 --- a/lib/gpu/cg_cmm_long.h +++ b/lib/gpu/cg_cmm_long.h @@ -21,7 +21,7 @@ #include "base_charge.h" template -class CMML_GPU_Memory : public ChargeGPUMemory { +class CMML_GPU_Memory : public BaseCharge { public: CMML_GPU_Memory(); ~CMML_GPU_Memory(); diff --git a/lib/gpu/cg_cmm_msm.cpp b/lib/gpu/cg_cmm_msm.cpp index 1912de1ba2..c0d09aa30c 100644 --- a/lib/gpu/cg_cmm_msm.cpp +++ b/lib/gpu/cg_cmm_msm.cpp @@ -25,10 +25,10 @@ #include #define CMMM_GPU_MemoryT CMMM_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -CMMM_GPU_MemoryT::CMMM_GPU_Memory() : ChargeGPUMemory(), +CMMM_GPU_MemoryT::CMMM_GPU_Memory() : BaseCharge(), _allocated(false) { } diff --git a/lib/gpu/cg_cmm_msm.h b/lib/gpu/cg_cmm_msm.h index 9be97de50d..afc054aadf 100644 --- a/lib/gpu/cg_cmm_msm.h +++ b/lib/gpu/cg_cmm_msm.h @@ -21,7 +21,7 @@ #include "base_charge.h" template -class CMMM_GPU_Memory : public ChargeGPUMemory { +class CMMM_GPU_Memory : public BaseCharge { public: CMMM_GPU_Memory(); ~CMMM_GPU_Memory(); diff --git a/lib/gpu/charmm_long.cpp b/lib/gpu/charmm_long.cpp index 87e1e8be8c..d949e3456d 100644 --- a/lib/gpu/charmm_long.cpp +++ b/lib/gpu/charmm_long.cpp @@ -25,10 +25,10 @@ #include #define CRML_GPU_MemoryT CRML_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -CRML_GPU_MemoryT::CRML_GPU_Memory() : ChargeGPUMemory(), +CRML_GPU_MemoryT::CRML_GPU_Memory() : BaseCharge(), _allocated(false) { } diff --git a/lib/gpu/charmm_long.h b/lib/gpu/charmm_long.h index 634a528a3a..6de168e0aa 100644 --- a/lib/gpu/charmm_long.h +++ b/lib/gpu/charmm_long.h @@ -21,7 +21,7 @@ #include "base_charge.h" template -class CRML_GPU_Memory : public ChargeGPUMemory { +class CRML_GPU_Memory : public BaseCharge { public: CRML_GPU_Memory(); ~CRML_GPU_Memory(); diff --git a/lib/gpu/device.cpp b/lib/gpu/device.cpp index eccba0786e..e6cb60b2a6 100644 --- a/lib/gpu/device.cpp +++ b/lib/gpu/device.cpp @@ -1,19 +1,17 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + device.cpp + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for management of the device where the computations are performed - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + begin : + email : brownw@ornl.gov + ***************************************************************************/ #include "device.h" #include "precision.h" @@ -29,21 +27,21 @@ #include "pair_gpu_dev_ptx.h" #endif -#define PairGPUDeviceT PairGPUDevice +#define DeviceT Device template -PairGPUDeviceT::PairGPUDevice() : _init_count(0), _device_init(false), +DeviceT::Device() : _init_count(0), _device_init(false), _gpu_mode(GPU_FORCE), _first_device(0), _last_device(0), _compiled(false) { } template -PairGPUDeviceT::~PairGPUDevice() { +DeviceT::~Device() { clear_device(); } template -int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica, +int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu, const int last_gpu, const int gpu_mode, const double p_split, const int nthreads, const int t_per_atom) { @@ -135,10 +133,10 @@ int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica, } template -int PairGPUDeviceT::init(PairGPUAns &ans, const bool charge, +int DeviceT::init(Answer &ans, const bool charge, const bool rot, const int nlocal, const int host_nlocal, const int nall, - PairGPUNbor *nbor, const int maxspecial, + Neighbor *nbor, const int maxspecial, const int gpu_host, const int max_nbors, const double cell_size, const bool pre_cut) { if (!_device_init) @@ -181,7 +179,7 @@ int PairGPUDeviceT::init(PairGPUAns &ans, const bool charge, if (!ans.init(ef_nlocal,charge,rot,*gpu)) return -3; - if (!nbor->init(&_nbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial, + if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial, *gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d, _block_cell_id, _block_nbor_build)) return -3; @@ -192,7 +190,7 @@ int PairGPUDeviceT::init(PairGPUAns &ans, const bool charge, } template -int PairGPUDeviceT::init(PairGPUAns &ans, const int nlocal, +int DeviceT::init(Answer &ans, const int nlocal, const int nall) { if (!_device_init) return -1; @@ -215,21 +213,21 @@ int PairGPUDeviceT::init(PairGPUAns &ans, const int nlocal, } template -void PairGPUDeviceT::set_single_precompute +void DeviceT::set_single_precompute (PPPMGPUMemory *pppm) { _long_range_precompute=1; pppm_single=pppm; } template -void PairGPUDeviceT::set_double_precompute +void DeviceT::set_double_precompute (PPPMGPUMemory *pppm) { _long_range_precompute=2; pppm_double=pppm; } template -void PairGPUDeviceT::init_message(FILE *screen, const char *name, +void DeviceT::init_message(FILE *screen, const char *name, const int first_gpu, const int last_gpu) { #ifdef USE_OPENCL std::string fs=""; @@ -272,7 +270,7 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name, } template -void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls, +void DeviceT::estimate_gpu_overhead(const int kernel_calls, double &gpu_overhead, double &gpu_driver_overhead) { UCL_H_Vec *host_data_in=NULL, *host_data_out=NULL; @@ -384,9 +382,9 @@ void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls, } template -void PairGPUDeviceT::output_times(UCL_Timer &time_pair, - PairGPUAns &ans, - PairGPUNbor &nbor, const double avg_split, +void DeviceT::output_times(UCL_Timer &time_pair, + Answer &ans, + Neighbor &nbor, const double avg_split, const double max_bytes, const double gpu_overhead, const double driver_overhead, @@ -440,12 +438,12 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair, } template -void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in, +void DeviceT::output_kspace_times(UCL_Timer &time_in, UCL_Timer &time_out, UCL_Timer &time_map, UCL_Timer &time_rho, UCL_Timer &time_interp, - PairGPUAns &ans, + Answer &ans, const double max_bytes, const double cpu_time, const double idle_time, FILE *screen) { @@ -500,13 +498,13 @@ void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in, } template -void PairGPUDeviceT::clear() { +void DeviceT::clear() { if (_init_count>0) { _long_range_precompute=0; _init_count--; if (_init_count==0) { atom.clear(); - _nbor_shared.clear(); + _neighbor_shared.clear(); if (_compiled) { k_zero.clear(); k_info.clear(); @@ -518,7 +516,7 @@ void PairGPUDeviceT::clear() { } template -void PairGPUDeviceT::clear_device() { +void DeviceT::clear_device() { while (_init_count>0) clear(); if (_device_init) { @@ -528,7 +526,7 @@ void PairGPUDeviceT::clear_device() { } template -int PairGPUDeviceT::compile_kernels() { +int DeviceT::compile_kernels() { int flag=0; if (_compiled) @@ -588,27 +586,27 @@ int PairGPUDeviceT::compile_kernels() { } template -double PairGPUDeviceT::host_memory_usage() const { +double DeviceT::host_memory_usage() const { return atom.host_memory_usage()+4*sizeof(numtyp)+ - sizeof(PairGPUDevice); + sizeof(Device); } -template class PairGPUDevice; -PairGPUDevice pair_gpu_device; +template class Device; +Device global_device; int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu, const int last_gpu, const int gpu_mode, const double particle_split, const int nthreads, const int t_per_atom) { - return pair_gpu_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode, + return global_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode, particle_split,nthreads,t_per_atom); } void lmp_clear_device() { - pair_gpu_device.clear_device(); + global_device.clear_device(); } double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom, double *virial, double &ecoul) { - return pair_gpu_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul); + return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul); } diff --git a/lib/gpu/device.cu b/lib/gpu/device.cu index 73411c99ff..6c2f3606c0 100644 --- a/lib/gpu/device.cu +++ b/lib/gpu/device.cu @@ -1,19 +1,17 @@ -/* ---------------------------------------------------------------------- - LAMMPS-Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ +// ************************************************************************** +// device.cu +// ------------------- +// W. Michael Brown (ORNL) +// +// Device code for device information +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : +// email : brownw@ornl.gov +// ***************************************************************************/ /************************************************************************* Preprocessor Definitions @@ -63,9 +61,6 @@ *************************************************************************/ -#ifndef PAIR_GPU_DEV_KERNEL -#define PAIR_GPU_DEV_KERNEL - #ifdef NV_KERNEL #include "nv_kernel_def.h" @@ -116,5 +111,3 @@ __kernel void kernel_info(__global int *info) { info[13]=THREADS_PER_CHARGE; } -#endif - diff --git a/lib/gpu/device.h b/lib/gpu/device.h index 5b5a5c7912..7aab94f243 100644 --- a/lib/gpu/device.h +++ b/lib/gpu/device.h @@ -1,26 +1,24 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + device.h + ------------------- + W. Michael Brown (ORNL) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for management of the device where the computations are performed - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + begin : + email : brownw@ornl.gov + ***************************************************************************/ -#ifndef PAIR_GPU_DEVICE_H -#define PAIR_GPU_DEVICE_H +#ifndef LAL_DEVICE_H +#define LAL_DEVICE_H #include "atom.h" -#include "ans.h" -#include "nbor.h" +#include "answer.h" +#include "neighbor.h" #include "pppm.h" #include "mpi.h" #include @@ -32,10 +30,10 @@ template class PPPMGPUMemory; template -class PairGPUDevice { +class Device { public: - PairGPUDevice(); - ~PairGPUDevice(); + Device(); + ~Device(); /// Initialize the device for use by this process /** Sets up a per-device MPI communicator for load balancing and initializes @@ -68,9 +66,9 @@ class PairGPUDevice { * - -3 if there is an out of memory error * - -4 if the GPU library was not compiled for GPU * - -5 Double precision is not supported on card **/ - int init(PairGPUAns &a, const bool charge, const bool rot, + int init(Answer &a, const bool charge, const bool rot, const int nlocal, const int host_nlocal, const int nall, - PairGPUNbor *nbor, const int maxspecial, const int gpu_host, + Neighbor *nbor, const int maxspecial, const int gpu_host, const int max_nbors, const double cell_size, const bool pre_cut); /// Initialize the device for Atom storage only @@ -83,7 +81,7 @@ class PairGPUDevice { * - -3 if there is an out of memory error * - -4 if the GPU library was not compiled for GPU * - -5 Double precision is not supported on card **/ - int init(PairGPUAns &ans, const int nlocal, const int nall); + int init(Answer &ans, const int nlocal, const int nall); /// Output a message for pair_style acceleration with device stats void init_message(FILE *screen, const char *name, @@ -109,8 +107,8 @@ class PairGPUDevice { inline bool double_precision() { return gpu->double_precision(); } /// Output a message with timing information - void output_times(UCL_Timer &time_pair, PairGPUAns &ans, - PairGPUNbor &nbor, const double avg_split, + void output_times(UCL_Timer &time_pair, Answer &ans, + Neighbor &nbor, const double avg_split, const double max_bytes, const double gpu_overhead, const double driver_overhead, const int threads_per_atom, FILE *screen); @@ -119,7 +117,7 @@ class PairGPUDevice { void output_kspace_times(UCL_Timer &time_in, UCL_Timer &time_out, UCL_Timer & time_map, UCL_Timer & time_rho, UCL_Timer &time_interp, - PairGPUAns &ans, + Answer &ans, const double max_bytes, const double cpu_time, const double cpu_idle_time, FILE *screen); @@ -130,7 +128,7 @@ class PairGPUDevice { void clear_device(); /// Add an answer object for putting forces, energies, etc from GPU to LAMMPS - inline void add_ans_object(PairGPUAns *ans) + inline void add_ans_object(Answer *ans) { ans_queue.push(ans); } /// Add "answers" (force,energies,etc.) into LAMMPS structures @@ -248,12 +246,12 @@ class PairGPUDevice { // --------------------------- ATOM DATA -------------------------- /// Atom Data - PairGPUAtom atom; + Atom atom; // --------------------------- NBOR DATA ---------------------------- /// Neighbor Data - PairGPUNborShared _nbor_shared; + NeighborShared _neighbor_shared; // ------------------------ LONG RANGE DATA ------------------------- @@ -274,7 +272,7 @@ class PairGPUDevice { } private: - std::queue *> ans_queue; + std::queue *> ans_queue; int _init_count; bool _device_init, _host_timer_started, _time_device; MPI_Comm _comm_world, _comm_replica, _comm_gpu; diff --git a/lib/gpu/ellipsoid_extra.h b/lib/gpu/ellipsoid_extra.h index 9d54efdeb9..62efadf181 100644 --- a/lib/gpu/ellipsoid_extra.h +++ b/lib/gpu/ellipsoid_extra.h @@ -1,7 +1,7 @@ // ************************************************************************** // ellipsoid_extra.h // ------------------- -// W. Michael Brown +// W. Michael Brown (ORNL) // // Device code for Ellipsoid math routines // @@ -13,8 +13,8 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifndef ELLIPSOID_EXTRA_H -#define ELLIPSOID_EXTRA_H +#ifndef LAL_ELLIPSOID_EXTRA_H +#define LAL_ELLIPSOID_EXTRA_H enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE}; @@ -409,16 +409,4 @@ __inline void gpu_times_column3(const numtyp m[9], const numtyp v[3], ans[2] = m[6]*v[0] + m[7]*v[1] + m[8]*v[2]; } - - - - - - - - - - - - #endif diff --git a/lib/gpu/ellipsoid_nbor.cu b/lib/gpu/ellipsoid_nbor.cu index e91d356b4e..74b6be2cfa 100644 --- a/lib/gpu/ellipsoid_nbor.cu +++ b/lib/gpu/ellipsoid_nbor.cu @@ -1,7 +1,7 @@ // ************************************************************************** // ellipsoid_nbor.cu // ------------------- -// W. Michael Brown +// W. Michael Brown (ORNL) // // Device code for Ellipsoid neighbor routines // @@ -13,9 +13,6 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifndef ELLIPSOID_NBOR_H -#define ELLIPSOID_NBOR_H - #ifdef NV_KERNEL #include "nv_kernel_def.h" @@ -161,5 +158,3 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form, dev_nbor[ii+nbor_pitch]=newj; } } - -#endif diff --git a/lib/gpu/gayberne.cpp b/lib/gpu/gayberne.cpp index 54ddc7f9e6..b1433ee7fb 100644 --- a/lib/gpu/gayberne.cpp +++ b/lib/gpu/gayberne.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_AL; #define GayBerneT GayBerne -extern PairGPUDevice device; +extern Device device; template GayBerneT::GayBerne() : BaseEllipsoid(), diff --git a/lib/gpu/lj.cpp b/lib/gpu/lj.cpp index 16bf7ae92b..58ff72ffe0 100644 --- a/lib/gpu/lj.cpp +++ b/lib/gpu/lj.cpp @@ -25,10 +25,10 @@ #include #define LJL_GPU_MemoryT LJL_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -LJL_GPU_MemoryT::LJL_GPU_Memory() : AtomicGPUMemory(), _allocated(false) { +LJL_GPU_MemoryT::LJL_GPU_Memory() : BaseAtomic(), _allocated(false) { } template diff --git a/lib/gpu/lj.h b/lib/gpu/lj.h index f90f372349..8e9989beb9 100644 --- a/lib/gpu/lj.h +++ b/lib/gpu/lj.h @@ -21,7 +21,7 @@ #include "base_atomic.h" template -class LJL_GPU_Memory : public AtomicGPUMemory { +class LJL_GPU_Memory : public BaseAtomic { public: LJL_GPU_Memory(); ~LJL_GPU_Memory(); diff --git a/lib/gpu/lj96.cpp b/lib/gpu/lj96.cpp index 4fb04716c1..d6477d6c3b 100644 --- a/lib/gpu/lj96.cpp +++ b/lib/gpu/lj96.cpp @@ -25,10 +25,10 @@ #include #define LJ96_GPU_MemoryT LJ96_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -LJ96_GPU_MemoryT::LJ96_GPU_Memory() : AtomicGPUMemory(), _allocated(false) { +LJ96_GPU_MemoryT::LJ96_GPU_Memory() : BaseAtomic(), _allocated(false) { } template diff --git a/lib/gpu/lj96.h b/lib/gpu/lj96.h index 187283c206..e68e8e4fad 100644 --- a/lib/gpu/lj96.h +++ b/lib/gpu/lj96.h @@ -21,7 +21,7 @@ #include "base_atomic.h" template -class LJ96_GPU_Memory : public AtomicGPUMemory { +class LJ96_GPU_Memory : public BaseAtomic { public: LJ96_GPU_Memory(); ~LJ96_GPU_Memory(); diff --git a/lib/gpu/lj_class2_long.cpp b/lib/gpu/lj_class2_long.cpp index 5b56bf0b3e..5c47df6b31 100644 --- a/lib/gpu/lj_class2_long.cpp +++ b/lib/gpu/lj_class2_long.cpp @@ -25,10 +25,10 @@ using namespace LAMMPS_AL; #define LJClass2LongT LJClass2Long -extern PairGPUDevice device; +extern Device device; template -LJClass2LongT::LJClass2Long() : ChargeGPUMemory(), +LJClass2LongT::LJClass2Long() : BaseCharge(), _allocated(false) { } diff --git a/lib/gpu/lj_class2_long.h b/lib/gpu/lj_class2_long.h index fff2f68994..c263c01e57 100644 --- a/lib/gpu/lj_class2_long.h +++ b/lib/gpu/lj_class2_long.h @@ -21,7 +21,7 @@ namespace LAMMPS_AL { template -class LJClass2Long : public ChargeGPUMemory { +class LJClass2Long : public BaseCharge { public: LJClass2Long(); ~LJClass2Long(); diff --git a/lib/gpu/lj_coul.cpp b/lib/gpu/lj_coul.cpp index c6eaa74080..1304195b4b 100644 --- a/lib/gpu/lj_coul.cpp +++ b/lib/gpu/lj_coul.cpp @@ -25,10 +25,10 @@ #include #define LJC_GPU_MemoryT LJC_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -LJC_GPU_MemoryT::LJC_GPU_Memory() : ChargeGPUMemory(), +LJC_GPU_MemoryT::LJC_GPU_Memory() : BaseCharge(), _allocated(false) { } diff --git a/lib/gpu/lj_coul.h b/lib/gpu/lj_coul.h index 5d1d325a56..628af7b430 100644 --- a/lib/gpu/lj_coul.h +++ b/lib/gpu/lj_coul.h @@ -21,7 +21,7 @@ #include "base_charge.h" template -class LJC_GPU_Memory : public ChargeGPUMemory { +class LJC_GPU_Memory : public BaseCharge { public: LJC_GPU_Memory(); ~LJC_GPU_Memory(); diff --git a/lib/gpu/lj_coul_long.cpp b/lib/gpu/lj_coul_long.cpp index 926906bb62..00a666d502 100644 --- a/lib/gpu/lj_coul_long.cpp +++ b/lib/gpu/lj_coul_long.cpp @@ -25,10 +25,10 @@ #include #define LJCL_GPU_MemoryT LJCL_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -LJCL_GPU_MemoryT::LJCL_GPU_Memory() : ChargeGPUMemory(), +LJCL_GPU_MemoryT::LJCL_GPU_Memory() : BaseCharge(), _allocated(false) { } diff --git a/lib/gpu/lj_coul_long.h b/lib/gpu/lj_coul_long.h index 9f10c9a888..3e9502ed41 100644 --- a/lib/gpu/lj_coul_long.h +++ b/lib/gpu/lj_coul_long.h @@ -21,7 +21,7 @@ #include "base_charge.h" template -class LJCL_GPU_Memory : public ChargeGPUMemory { +class LJCL_GPU_Memory : public BaseCharge { public: LJCL_GPU_Memory(); ~LJCL_GPU_Memory(); diff --git a/lib/gpu/lj_expand.cpp b/lib/gpu/lj_expand.cpp index bb3fccad8e..67860980d3 100644 --- a/lib/gpu/lj_expand.cpp +++ b/lib/gpu/lj_expand.cpp @@ -25,10 +25,10 @@ #include #define LJE_GPU_MemoryT LJE_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -LJE_GPU_MemoryT::LJE_GPU_Memory() : AtomicGPUMemory(), _allocated(false) { +LJE_GPU_MemoryT::LJE_GPU_Memory() : BaseAtomic(), _allocated(false) { } template diff --git a/lib/gpu/lj_expand.h b/lib/gpu/lj_expand.h index 7e6bdd92fb..22ea145218 100644 --- a/lib/gpu/lj_expand.h +++ b/lib/gpu/lj_expand.h @@ -21,7 +21,7 @@ #include "base_atomic.h" template -class LJE_GPU_Memory : public AtomicGPUMemory { +class LJE_GPU_Memory : public BaseAtomic { public: LJE_GPU_Memory(); ~LJE_GPU_Memory(); diff --git a/lib/gpu/morse.cpp b/lib/gpu/morse.cpp index c23ea587bf..a9298333fc 100644 --- a/lib/gpu/morse.cpp +++ b/lib/gpu/morse.cpp @@ -25,10 +25,10 @@ #include #define MOR_GPU_MemoryT MOR_GPU_Memory -extern PairGPUDevice device; +extern Device device; template -MOR_GPU_MemoryT::MOR_GPU_Memory() : AtomicGPUMemory(), _allocated(false) { +MOR_GPU_MemoryT::MOR_GPU_Memory() : BaseAtomic(), _allocated(false) { } template diff --git a/lib/gpu/morse.h b/lib/gpu/morse.h index 9b718d354e..84f298a702 100644 --- a/lib/gpu/morse.h +++ b/lib/gpu/morse.h @@ -21,7 +21,7 @@ #include "base_atomic.h" template -class MOR_GPU_Memory : public AtomicGPUMemory { +class MOR_GPU_Memory : public BaseAtomic { public: MOR_GPU_Memory(); ~MOR_GPU_Memory(); diff --git a/lib/gpu/nbor_cpu.cu b/lib/gpu/nbor_cpu.cu deleted file mode 100644 index 238023b429..0000000000 --- a/lib/gpu/nbor_cpu.cu +++ /dev/null @@ -1,46 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ - -#ifdef NV_KERNEL - -#include "geryon/ucl_nv_kernel.h" - -#else - -#define GLOBAL_ID_X get_global_id(0) - -#endif - -__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij, - const int inum) { - // ii indexes the two interacting particles in gi - int ii=GLOBAL_ID_X; - - if (ii -void PairGPUNbor::build_nbor_list(const int inum, const int host_inum, +void Neighbor::build_nbor_list(const int inum, const int host_inum, const int nall, - PairGPUAtom &atom, + Atom &atom, double *sublo, double *subhi, int *tag, int **nspecial, int **special, bool &success, int &mn) { @@ -399,8 +397,8 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum, time_nbor.stop(); } -template void PairGPUNbor::build_nbor_list +template void Neighbor::build_nbor_list (const int inum, const int host_inum, const int nall, - PairGPUAtom &atom, double *sublo, double *subhi, + Atom &atom, double *sublo, double *subhi, int *, int **, int **, bool &success, int &mn); diff --git a/lib/gpu/nbor.h b/lib/gpu/neighbor.h similarity index 84% rename from lib/gpu/nbor.h rename to lib/gpu/neighbor.h index 0bb2ceaa0c..5242b154b9 100644 --- a/lib/gpu/nbor.h +++ b/lib/gpu/neighbor.h @@ -1,25 +1,24 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov +/*************************************************************************** + neighbor.h + ------------------- + W. Michael Brown (ORNL) + Peng Wang (Nvidia) - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. + Class for handling neighbor lists - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ -/* ---------------------------------------------------------------------- - Contributing authors: Mike Brown (ORNL), brownw@ornl.gov -------------------------------------------------------------------------- */ + begin : + email : brownw@ornl.gov, penwang@nvidia.com + ***************************************************************************/ -#ifndef PAIR_GPU_NBOR_H -#define PAIR_GPU_NBOR_H +#ifndef LAL_NEIGHBOR_H +#define LAL_NEIGHBOR_H #include "atom.h" -#include "nbor_shared.h" +#include "neighbor_shared.h" #define IJ_SIZE 131072 @@ -37,10 +36,10 @@ using namespace ucl_cudadr; #endif -class PairGPUNbor { +class Neighbor { public: - PairGPUNbor() : _allocated(false), _use_packing(false) {} - ~PairGPUNbor() { clear(); } + Neighbor() : _allocated(false), _use_packing(false) {} + ~Neighbor() { clear(); } /// Determine whether neighbor unpacking should be used /** If false, twice as much memory is reserved to allow unpacking neighbors by @@ -57,7 +56,7 @@ class PairGPUNbor { * 2 if gpu_nbor is true, and host needs a full nbor list * \param pre_cut True if cutoff test will be performed in separate kernel * than the force kernel **/ - bool init(PairGPUNborShared *shared, const int inum, const int host_inum, + bool init(NeighborShared *shared, const int inum, const int host_inum, const int max_nbors, const int maxspecial, UCL_Device &dev, const bool gpu_nbor, const int gpu_host, const bool pre_cut, const int block_cell_2d, const int block_cell_id, @@ -138,7 +137,7 @@ class PairGPUNbor { /// Build nbor list on the device template void build_nbor_list(const int inum, const int host_inum, const int nall, - PairGPUAtom &atom, double *sublo, + Atom &atom, double *sublo, double *subhi, int *tag, int **nspecial, int **special, bool &success, int &max_nbors); @@ -187,7 +186,7 @@ class PairGPUNbor { UCL_Timer time_nbor, time_kernel; private: - PairGPUNborShared *_shared; + NeighborShared *_shared; UCL_Device *dev; bool _allocated, _use_packing; int _max_atoms, _max_nbors, _max_host, _nbor_pitch, _maxspecial; diff --git a/lib/gpu/neighbor_cpu.cu b/lib/gpu/neighbor_cpu.cu new file mode 100644 index 0000000000..014561e829 --- /dev/null +++ b/lib/gpu/neighbor_cpu.cu @@ -0,0 +1,44 @@ +// ************************************************************************** +// atom.cu +// ------------------- +// W. Michael Brown (ORNL) +// +// Device code for handling CPU generated neighbor lists +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : +// email : brownw@ornl.gov +// ***************************************************************************/ + +#ifdef NV_KERNEL + +#include "geryon/ucl_nv_kernel.h" + +#else + +#define GLOBAL_ID_X get_global_id(0) + +#endif + +__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij, + const int inum) { + // ii indexes the two interacting particles in gi + int ii=GLOBAL_ID_X; + + if (iiload_string(nbor_cpu,flags.c_str()); + nbor_program->load_string(neighbor_cpu,flags.c_str()); k_nbor.set_function(*nbor_program,"kernel_unpack"); } else { build_program=new UCL_Program(dev); @@ -58,7 +56,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) { std::cerr << "CANNOT CURRENTLY USE GPU NEIGHBORING WITH OPENCL\n"; exit(1); #else - build_program->load_string(nbor_gpu,flags.c_str()); + build_program->load_string(neighbor_gpu,flags.c_str()); #endif k_cell_id.set_function(*build_program,"calc_cell_id"); k_cell_counts.set_function(*build_program,"kernel_calc_cell_counts"); diff --git a/lib/gpu/neighbor_shared.h b/lib/gpu/neighbor_shared.h new file mode 100644 index 0000000000..c2ea9b4ac7 --- /dev/null +++ b/lib/gpu/neighbor_shared.h @@ -0,0 +1,56 @@ +/*************************************************************************** + neighbor_shared.h + ------------------- + W. Michael Brown (ORNL) + + Class for management of data shared by all neighbor lists + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov + ***************************************************************************/ + +#ifndef LAL_NEIGHBOR_SHARED_H +#define LAL_NEIGHBOR_SHARED_H + +#ifdef USE_OPENCL + +#include "geryon/ocl_kernel.h" +#include "geryon/ocl_texture.h" +using namespace ucl_opencl; + +#else + +#include "geryon/nvd_kernel.h" +#include "geryon/nvd_texture.h" +using namespace ucl_cudadr; + +#endif + +class NeighborShared { + public: + NeighborShared() : _compiled(false) {} + ~NeighborShared() { clear(); } + + /// Free all memory on host and device + void clear(); + + /// Texture for cached position/type access with CUDA + UCL_Texture neigh_tex; + + /// Compile kernels for neighbor lists + void compile_kernels(UCL_Device &dev, const bool gpu_nbor); + + // ----------------------------- Kernels + UCL_Program *nbor_program, *build_program; + UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor; + UCL_Kernel k_transpose, k_special; + + private: + bool _compiled, _gpu_nbor; +}; + +#endif diff --git a/lib/gpu/pppm.cpp b/lib/gpu/pppm.cpp index 2bd394e9f6..8cf9754a59 100644 --- a/lib/gpu/pppm.cpp +++ b/lib/gpu/pppm.cpp @@ -26,13 +26,13 @@ #define PPPMGPUMemoryT PPPMGPUMemory -extern PairGPUDevice pair_gpu_device; +extern Device global_device; template PPPMGPUMemoryT::PPPMGPUMemory() : _allocated(false), _compiled(false), _max_bytes(0) { - device=&pair_gpu_device; - ans=new PairGPUAns(); + device=&global_device; + ans=new Answer(); } template diff --git a/lib/gpu/pppm.h b/lib/gpu/pppm.h index 8ebc428942..da9a3758a3 100644 --- a/lib/gpu/pppm.h +++ b/lib/gpu/pppm.h @@ -27,7 +27,7 @@ #include "geryon/nvd_texture.h" #endif -template class PairGPUDevice; +template class Device; template class PPPMGPUMemory { @@ -118,7 +118,7 @@ class PPPMGPUMemory { // -------------------------- DEVICE DATA ------------------------- /// Device Properties and Atom and Neighbor storage - PairGPUDevice *device; + Device *device; /// Geryon device UCL_Device *ucl_device; @@ -132,7 +132,7 @@ class PPPMGPUMemory { // --------------------------- ATOM DATA -------------------------- /// Atom Data - PairGPUAtom *atom; + Atom *atom; // --------------------------- GRID DATA -------------------------- @@ -162,7 +162,7 @@ class PPPMGPUMemory { // ------------------------ FORCE/ENERGY DATA ----------------------- - PairGPUAns *ans; + Answer *ans; // ------------------------- DEVICE KERNELS ------------------------- UCL_Program *pppm_program; diff --git a/lib/gpu/re_squared.cpp b/lib/gpu/re_squared.cpp index adc4acfb56..851a0c2039 100644 --- a/lib/gpu/re_squared.cpp +++ b/lib/gpu/re_squared.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_AL; #define RESquaredT RESquared -extern PairGPUDevice device; +extern Device device; template RESquaredT::RESquared() : BaseEllipsoid(),