More renaming...

This commit is contained in:
W. Michael Brown
2011-06-10 17:31:48 -04:00
parent 7e0f127008
commit 9ee36a3c22
53 changed files with 580 additions and 637 deletions

View File

@ -31,8 +31,8 @@ UCL_H = $(wildcard ./geryon/ucl*.h)
NVC_H = $(wildcard ./geryon/nvc*.h) $(UCL_H)
NVD_H = $(wildcard ./geryon/nvd*.h) $(UCL_H) nv_kernel_def.h
# Headers for Pair Stuff
PAIR_H = atom.h ans.h nbor_shared.h \
nbor.h precision.h device.h \
PAIR_H = atom.h answer.h neighbor_shared.h \
neighbor.h precision.h device.h \
balance.h pppm.h
ALL_H = $(NVD_H) $(PAIR_H)
@ -42,7 +42,7 @@ CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
$(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
$(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
$(OBJ_DIR)/nbor.o $(OBJ_DIR)/nbor_shared.o \
$(OBJ_DIR)/nbor.o $(OBJ_DIR)/neighbor_shared.o \
$(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
$(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
$(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
@ -62,8 +62,8 @@ OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
$(CUDPP)
PTXS = $(OBJ_DIR)/device.ptx \
$(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom_ptx.h \
$(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
$(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
$(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
$(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \
$(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \
$(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \
@ -111,26 +111,26 @@ $(OBJ_DIR)/atom_ptx.h: $(OBJ_DIR)/atom.ptx
$(OBJ_DIR)/atom.o: atom.cpp atom.h $(NVD_H) $(OBJ_DIR)/atom_ptx.h
$(CUDR) -o $@ -c atom.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ans.o: ans.cpp ans.h $(NVD_H)
$(CUDR) -o $@ -c ans.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ans.o: answer.cpp answer.h $(NVD_H)
$(CUDR) -o $@ -c answer.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/nbor_cpu.ptx: nbor_cpu.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_cpu.cu
$(OBJ_DIR)/neighbor_cpu.ptx: neighbor_cpu.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_cpu.cu
$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/nbor_cpu.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h
$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/neighbor_cpu.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h
$(OBJ_DIR)/nbor_gpu.ptx: nbor_gpu.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_gpu.cu
$(OBJ_DIR)/neighbor_gpu.ptx: neighbor_gpu.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_gpu.cu
$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/nbor_gpu.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/neighbor_gpu.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
$(CUDR) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
$(CUDR) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h nbor_shared.h $(NVD_H)
$(CUDR) -o $@ -c nbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h neighbor_shared.h $(NVD_H)
$(CUDR) -o $@ -c neighbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/device.ptx: device.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ device.cu

View File

@ -24,15 +24,15 @@ OCL_LIB = $(LIB_DIR)/libgpu.a
UCL_H = $(wildcard ./geryon/ucl*.h)
OCL_H = $(wildcard ./geryon/ocl*.h) $(UCL_H)
# Headers for Pair Stuff
PAIR_H = atom.h ans.h nbor_shared.h \
nbor.h precision.h device.h \
PAIR_H = atom.h answer.h neighbor_shared.h \
neighbor.h precision.h device.h \
balance.h pppm.h
ALL_H = $(OCL_H) $(PAIR_H)
EXECS = $(BIN_DIR)/ocl_get_devices
OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
$(OBJ_DIR)/nbor_shared.o $(OBJ_DIR)/nbor.o \
$(OBJ_DIR)/neighbor_shared.o $(OBJ_DIR)/nbor.o \
$(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
$(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
$(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
@ -69,17 +69,17 @@ $(OBJ_DIR)/atom_cl.h: atom.cu
$(OBJ_DIR)/atom.o: atom.cpp atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h
$(OCL) -o $@ -c atom.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ans.o: ans.cpp ans.h $(OCL_H)
$(OCL) -o $@ -c ans.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ans.o: answer.cpp answer.h $(OCL_H)
$(OCL) -o $@ -c answer.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/nbor_cl.h: nbor_cpu.cu
$(BSH) ./geryon/file_to_cstr.sh nbor_cpu.cu $(OBJ_DIR)/nbor_cl.h
$(OBJ_DIR)/nbor_cl.h: neighbor_cpu.cu
$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu.cu $(OBJ_DIR)/nbor_cl.h
$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
$(OCL) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
$(OCL) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h $(OCL_H) nbor_shared.h
$(OCL) -o $@ -c nbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h $(OCL_H) neighbor_shared.h
$(OCL) -o $@ -c neighbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_dev_cl.h: device.cu
$(BSH) ./geryon/file_to_cstr.sh device.cu $(OBJ_DIR)/pair_gpu_dev_cl.h

View File

@ -1,31 +1,29 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
answer.cpp
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for data management of forces, torques, energies, and virials
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
#include "ans.h"
begin :
email : brownw@ornl.gov
***************************************************************************/
#define PairGPUAnsT PairGPUAns<numtyp,acctyp>
#include "answer.h"
#define AnswerT Answer<numtyp,acctyp>
template <class numtyp, class acctyp>
PairGPUAnsT::PairGPUAns() : _allocated(false),_eflag(false),_vflag(false),
AnswerT::Answer() : _allocated(false),_eflag(false),_vflag(false),
_inum(0),_ilist(NULL),_newton(false) {
}
template <class numtyp, class acctyp>
int PairGPUAnsT::bytes_per_atom() const {
int AnswerT::bytes_per_atom() const {
int bytes=11*sizeof(acctyp);
if (_rot)
bytes+=4*sizeof(acctyp);
@ -35,7 +33,7 @@ int PairGPUAnsT::bytes_per_atom() const {
}
template <class numtyp, class acctyp>
bool PairGPUAnsT::alloc(const int inum) {
bool AnswerT::alloc(const int inum) {
_max_local=static_cast<int>(static_cast<double>(inum)*1.10);
bool success=true;
@ -70,7 +68,7 @@ bool PairGPUAnsT::alloc(const int inum) {
}
template <class numtyp, class acctyp>
bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
bool AnswerT::init(const int inum, const bool charge, const bool rot,
UCL_Device &devi) {
clear();
@ -100,7 +98,7 @@ bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
}
template <class numtyp, class acctyp>
bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
bool AnswerT::add_fields(const bool charge, const bool rot) {
bool realloc=false;
if (charge && _charge==false) {
_charge=true;
@ -122,7 +120,7 @@ bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
}
template <class numtyp, class acctyp>
void PairGPUAnsT::clear_resize() {
void AnswerT::clear_resize() {
if (!_allocated)
return;
_allocated=false;
@ -134,7 +132,7 @@ void PairGPUAnsT::clear_resize() {
}
template <class numtyp, class acctyp>
void PairGPUAnsT::clear() {
void AnswerT::clear() {
_gpu_bytes=0;
if (!_allocated)
return;
@ -148,7 +146,7 @@ void PairGPUAnsT::clear() {
}
template <class numtyp, class acctyp>
double PairGPUAnsT::host_memory_usage() const {
double AnswerT::host_memory_usage() const {
int atom_bytes=4;
if (_charge)
atom_bytes+=1;
@ -156,11 +154,11 @@ double PairGPUAnsT::host_memory_usage() const {
atom_bytes+=4;
int ans_bytes=atom_bytes+_ev_fields;
return ans_bytes*(_max_local)*sizeof(acctyp)+
sizeof(PairGPUAns<numtyp,acctyp>);
sizeof(Answer<numtyp,acctyp>);
}
template <class numtyp, class acctyp>
void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
void AnswerT::copy_answers(const bool eflag, const bool vflag,
const bool ef_atom, const bool vf_atom) {
time_answer.start();
_eflag=eflag;
@ -184,7 +182,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
}
template <class numtyp, class acctyp>
void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
void AnswerT::copy_answers(const bool eflag, const bool vflag,
const bool ef_atom, const bool vf_atom,
int *ilist) {
_ilist=ilist;
@ -192,7 +190,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
}
template <class numtyp, class acctyp>
double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
double AnswerT::energy_virial(double *eatom, double **vatom,
double *virial) {
if (_eflag==false && _vflag==false)
return 0.0;
@ -268,7 +266,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
}
template <class numtyp, class acctyp>
double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
double AnswerT::energy_virial(double *eatom, double **vatom,
double *virial, double &ecoul) {
if (_eflag==false && _vflag==false)
return 0.0;
@ -359,7 +357,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
}
template <class numtyp, class acctyp>
void PairGPUAnsT::get_answers(double **f, double **tor) {
void AnswerT::get_answers(double **f, double **tor) {
acctyp *ap=host_ans.begin();
if (_ilist==NULL) {
for (int i=0; i<_inum; i++) {
@ -404,4 +402,4 @@ void PairGPUAnsT::get_answers(double **f, double **tor) {
}
}
template class PairGPUAns<PRECISION,ACC_PRECISION>;
template class Answer<PRECISION,ACC_PRECISION>;

View File

@ -1,22 +1,20 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
answer.h
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for data management of forces, torques, energies, and virials
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef PAIR_GPU_ANS_H
#define PAIR_GPU_ANS_H
#ifndef LAL_ANSWER_H
#define LAL_ANSWER_H
#include <math.h>
#include "mpi.h"
@ -38,10 +36,10 @@ using namespace ucl_cudadr;
#include "precision.h"
template <class numtyp, class acctyp>
class PairGPUAns {
class Answer {
public:
PairGPUAns();
~PairGPUAns() { clear(); }
Answer();
~Answer() { clear(); }
/// Current number of local atoms stored
inline int inum() const { return _inum; }

View File

@ -1,26 +1,24 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
atom.cpp
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for particle data management
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#include "atom.h"
#define PairGPUAtomT PairGPUAtom<numtyp,acctyp>
#define AtomT Atom<numtyp,acctyp>
template <class numtyp, class acctyp>
PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
AtomT::Atom() : _compiled(false),_allocated(false),
_max_gpu_bytes(0) {
#ifndef USE_OPENCL
sort_config.op = CUDPP_ADD;
@ -31,7 +29,7 @@ PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
}
template <class numtyp, class acctyp>
int PairGPUAtomT::bytes_per_atom() const {
int AtomT::bytes_per_atom() const {
int id_space=0;
if (_gpu_nbor)
id_space=2;
@ -44,7 +42,7 @@ int PairGPUAtomT::bytes_per_atom() const {
}
template <class numtyp, class acctyp>
bool PairGPUAtomT::alloc(const int nall) {
bool AtomT::alloc(const int nall) {
_max_atoms=static_cast<int>(static_cast<double>(nall)*1.10);
bool success=true;
@ -138,7 +136,7 @@ bool PairGPUAtomT::alloc(const int nall) {
}
template <class numtyp, class acctyp>
bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
bool AtomT::add_fields(const bool charge, const bool rot,
const bool gpu_nbor, const bool bonds) {
bool realloc=false;
if (charge && _charge==false) {
@ -167,7 +165,7 @@ bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
}
template <class numtyp, class acctyp>
bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
bool AtomT::init(const int nall, const bool charge, const bool rot,
UCL_Device &devi, const bool gpu_nbor,
const bool bonds) {
clear();
@ -206,7 +204,7 @@ bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
}
template <class numtyp, class acctyp>
void PairGPUAtomT::clear_resize() {
void AtomT::clear_resize() {
if (!_allocated)
return;
_allocated=false;
@ -240,7 +238,7 @@ void PairGPUAtomT::clear_resize() {
}
template <class numtyp, class acctyp>
void PairGPUAtomT::clear() {
void AtomT::clear() {
_max_gpu_bytes=0;
if (!_allocated)
return;
@ -260,19 +258,19 @@ void PairGPUAtomT::clear() {
}
template <class numtyp, class acctyp>
double PairGPUAtomT::host_memory_usage() const {
double AtomT::host_memory_usage() const {
int atom_bytes=4;
if (_charge)
atom_bytes+=1;
if (_rot)
atom_bytes+=4;
return _max_atoms*atom_bytes*sizeof(numtyp)+
sizeof(PairGPUAtom<numtyp,acctyp>);
sizeof(Atom<numtyp,acctyp>);
}
// Sort arrays for neighbor list calculation
template <class numtyp, class acctyp>
void PairGPUAtomT::sort_neighbor(const int num_atoms) {
void AtomT::sort_neighbor(const int num_atoms) {
#ifndef USE_OPENCL
CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(),
(int *)dev_particle_id.begin(),
@ -292,7 +290,7 @@ void PairGPUAtomT::sort_neighbor(const int num_atoms) {
#endif
template <class numtyp, class acctyp>
void PairGPUAtomT::compile_kernels(UCL_Device &dev) {
void AtomT::compile_kernels(UCL_Device &dev) {
atom_program=new UCL_Program(dev);
atom_program->load_string(atom,"");
k_cast_x.set_function(*atom_program,"kernel_cast_x");
@ -301,4 +299,4 @@ void PairGPUAtomT::compile_kernels(UCL_Device &dev) {
#endif
template class PairGPUAtom<PRECISION,ACC_PRECISION>;
template class Atom<PRECISION,ACC_PRECISION>;

View File

@ -1,19 +1,17 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
// **************************************************************************
// atom.cu
// -------------------
// W. Michael Brown (ORNL)
//
// Device code for atom data casting
//
// __________________________________________________________________________
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
// __________________________________________________________________________
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#include "geryon/ucl_nv_kernel.h"

View File

@ -1,19 +1,17 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
atom.h
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for particle data management
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef PAIR_GPU_ATOM_H
#define PAIR_GPU_ATOM_H
@ -41,10 +39,10 @@ using namespace ucl_cudadr;
#include "precision.h"
template <class numtyp, class acctyp>
class PairGPUAtom {
class Atom {
public:
PairGPUAtom();
~PairGPUAtom() { clear(); }
Atom();
~Atom() { clear(); }
/// Maximum number of atoms that can be stored with current allocation
inline int max_atoms() const { return _max_atoms; }

View File

@ -1,22 +1,20 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
balance.h
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for host-device load balancing
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
#ifndef PAIR_GPU_BALANCE_H
#define PAIR_GPU_BALANCE_H
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef LAL_BALANCE_H
#define LAL_BALANCE_H
#include "device.h"
#include <math.h>
@ -27,13 +25,13 @@
/// Host/device load balancer
template<class numtyp, class acctyp>
class PairGPUBalance {
class Balance {
public:
inline PairGPUBalance() : _init_done(false), _measure_this_step(false) {}
inline ~PairGPUBalance() { clear(); }
inline Balance() : _init_done(false), _measure_this_step(false) {}
inline ~Balance() { clear(); }
/// Clear any old data and setup for new LAMMPS run
inline void init(PairGPUDevice<numtyp, acctyp> *gpu, const bool gpu_nbor,
inline void init(Device<numtyp, acctyp> *gpu, const bool gpu_nbor,
const double split);
/// Clear all host and device data
@ -107,7 +105,7 @@ class PairGPUBalance {
}
private:
PairGPUDevice<numtyp,acctyp> *_device;
Device<numtyp,acctyp> *_device;
UCL_Timer _device_time;
bool _init_done, _gpu_nbor;
@ -119,10 +117,10 @@ class PairGPUBalance {
int _inum, _inum_full, _timestep;
};
#define PairGPUBalanceT PairGPUBalance<numtyp,acctyp>
#define BalanceT Balance<numtyp,acctyp>
template <class numtyp, class acctyp>
void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu,
void BalanceT::init(Device<numtyp, acctyp> *gpu,
const bool gpu_nbor, const double split) {
clear();
_gpu_nbor=gpu_nbor;
@ -145,7 +143,7 @@ void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu,
}
template <class numtyp, class acctyp>
int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
int BalanceT::get_gpu_count(const int ago, const int inum_full) {
_measure_this_step=false;
if (_load_balance) {
if (_avg_count<11 || _timestep%_HD_BALANCE_EVERY==0) {
@ -164,7 +162,7 @@ int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
}
template <class numtyp, class acctyp>
void PairGPUBalanceT::balance(const double cpu_time) {
void BalanceT::balance(const double cpu_time) {
if (_measure_this_step) {
_measure_this_step=false;
double gpu_time=_device_time.seconds();

View File

@ -1,46 +1,44 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
base_atomic.h
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Base class for pair styles with per-particle data for position and type
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#include "base_atomic.h"
#define AtomicGPUMemoryT AtomicGPUMemory<numtyp, acctyp>
#define BaseAtomicT BaseAtomic<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
extern Device<PRECISION,ACC_PRECISION> global_device;
template <class numtyp, class acctyp>
AtomicGPUMemoryT::AtomicGPUMemory() : _compiled(false), _max_bytes(0) {
device=&pair_gpu_device;
ans=new PairGPUAns<numtyp,acctyp>();
nbor=new PairGPUNbor();
BaseAtomicT::BaseAtomic() : _compiled(false), _max_bytes(0) {
device=&global_device;
ans=new Answer<numtyp,acctyp>();
nbor=new Neighbor();
}
template <class numtyp, class acctyp>
AtomicGPUMemoryT::~AtomicGPUMemory() {
BaseAtomicT::~BaseAtomic() {
delete ans;
delete nbor;
}
template <class numtyp, class acctyp>
int AtomicGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
int BaseAtomicT::bytes_per_atom_atomic(const int max_nbors) const {
return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
nbor->bytes_per_atom(max_nbors);
}
template <class numtyp, class acctyp>
int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
int BaseAtomicT::init_atomic(const int nlocal, const int nall,
const int max_nbors, const int maxspecial,
const double cell_size,
const double gpu_split, FILE *_screen,
@ -49,7 +47,7 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
screen=_screen;
bool gpu_nbor=false;
if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
gpu_nbor=true;
int _gpu_host=0;
@ -90,12 +88,12 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
}
template <class numtyp, class acctyp>
void AtomicGPUMemoryT::estimate_gpu_overhead() {
void BaseAtomicT::estimate_gpu_overhead() {
device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
}
template <class numtyp, class acctyp>
void AtomicGPUMemoryT::clear_atomic() {
void BaseAtomicT::clear_atomic() {
// Output any timing information
acc_timers();
double avg_split=hd_balancer.all_avg_split();
@ -123,7 +121,7 @@ void AtomicGPUMemoryT::clear_atomic() {
// Copy neighbor list from host
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
int * BaseAtomicT::reset_nbors(const int nall, const int inum, int *ilist,
int *numj, int **firstneigh, bool &success) {
success=true;
@ -147,7 +145,7 @@ int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
// Build neighbor list on device
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
inline void BaseAtomicT::build_nbor_list(const int inum,
const int host_inum,
const int nall, double **host_x,
int *host_type, double *sublo,
@ -176,7 +174,7 @@ inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
// Copy nbor list from host if necessary and then calculate forces, virials,..
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
void BaseAtomicT::compute(const int f_ago, const int inum_full,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
@ -217,7 +215,7 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
// Reneighbor on GPU if necessary and then compute forces, virials, energies
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
int ** BaseAtomicT::compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, int *tag,
int **nspecial, int **special, const bool eflag,
@ -263,13 +261,13 @@ int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
}
template <class numtyp, class acctyp>
double AtomicGPUMemoryT::host_memory_usage_atomic() const {
double BaseAtomicT::host_memory_usage_atomic() const {
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
4*sizeof(numtyp)+sizeof(AtomicGPUMemory<numtyp,acctyp>);
4*sizeof(numtyp)+sizeof(BaseAtomic<numtyp,acctyp>);
}
template <class numtyp, class acctyp>
void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
void BaseAtomicT::compile_kernels(UCL_Device &dev, const char *pair_str) {
if (_compiled)
return;
@ -285,5 +283,5 @@ void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
_compiled=true;
}
template class AtomicGPUMemory<PRECISION,ACC_PRECISION>;
template class BaseAtomic<PRECISION,ACC_PRECISION>;

View File

@ -1,22 +1,20 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
base_atomic.h
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Base class for pair styles with per-particle data for position and type
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
#ifndef ATOMIC_GPU_MEMORY_H
#define ATOMIC_GPU_MEMORY_H
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef LAL_BASE_ATOMIC_H
#define LAL_BASE_ATOMIC_H
#include "device.h"
#include "balance.h"
@ -29,10 +27,10 @@
#endif
template <class numtyp, class acctyp>
class AtomicGPUMemory {
class BaseAtomic {
public:
AtomicGPUMemory();
virtual ~AtomicGPUMemory();
BaseAtomic();
virtual ~BaseAtomic();
/// Clear any previous data and set up for a new LAMMPS run
/** \param max_nbors initial number of rows in the neighbor matrix
@ -150,7 +148,7 @@ class AtomicGPUMemory {
// -------------------------- DEVICE DATA -------------------------
/// Device Properties and Atom and Neighbor storage
PairGPUDevice<numtyp,acctyp> *device;
Device<numtyp,acctyp> *device;
/// Geryon device
UCL_Device *ucl_device;
@ -159,7 +157,7 @@ class AtomicGPUMemory {
UCL_Timer time_pair;
/// Host device load balancer
PairGPUBalance<numtyp,acctyp> hd_balancer;
Balance<numtyp,acctyp> hd_balancer;
/// LAMMPS pointer for screen output
FILE *screen;
@ -167,16 +165,16 @@ class AtomicGPUMemory {
// --------------------------- ATOM DATA --------------------------
/// Atom Data
PairGPUAtom<numtyp,acctyp> *atom;
Atom<numtyp,acctyp> *atom;
// ------------------------ FORCE/ENERGY DATA -----------------------
PairGPUAns<numtyp,acctyp> *ans;
Answer<numtyp,acctyp> *ans;
// --------------------------- NBOR DATA ----------------------------
/// Neighbor data
PairGPUNbor *nbor;
Neighbor *nbor;
/// True if we need to accumulate time for neighboring
bool nbor_time_avail;

View File

@ -1,46 +1,45 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
base_charge.cpp
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Base class for pair styles needing per-particle data for position,
charge, and type.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#include "base_charge.h"
#define ChargeGPUMemoryT ChargeGPUMemory<numtyp, acctyp>
#define BaseChargeT BaseCharge<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
extern Device<PRECISION,ACC_PRECISION> global_device;
template <class numtyp, class acctyp>
ChargeGPUMemoryT::ChargeGPUMemory() : _compiled(false), _max_bytes(0) {
device=&pair_gpu_device;
ans=new PairGPUAns<numtyp,acctyp>();
nbor=new PairGPUNbor();
BaseChargeT::BaseCharge() : _compiled(false), _max_bytes(0) {
device=&global_device;
ans=new Answer<numtyp,acctyp>();
nbor=new Neighbor();
}
template <class numtyp, class acctyp>
ChargeGPUMemoryT::~ChargeGPUMemory() {
BaseChargeT::~BaseCharge() {
delete ans;
delete nbor;
}
template <class numtyp, class acctyp>
int ChargeGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
int BaseChargeT::bytes_per_atom_atomic(const int max_nbors) const {
return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
nbor->bytes_per_atom(max_nbors);
}
template <class numtyp, class acctyp>
int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
int BaseChargeT::init_atomic(const int nlocal, const int nall,
const int max_nbors, const int maxspecial,
const double cell_size,
const double gpu_split, FILE *_screen,
@ -49,7 +48,7 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
screen=_screen;
bool gpu_nbor=false;
if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
gpu_nbor=true;
int _gpu_host=0;
@ -92,12 +91,12 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
}
template <class numtyp, class acctyp>
void ChargeGPUMemoryT::estimate_gpu_overhead() {
void BaseChargeT::estimate_gpu_overhead() {
device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
}
template <class numtyp, class acctyp>
void ChargeGPUMemoryT::clear_atomic() {
void BaseChargeT::clear_atomic() {
// Output any timing information
acc_timers();
double avg_split=hd_balancer.all_avg_split();
@ -125,7 +124,7 @@ void ChargeGPUMemoryT::clear_atomic() {
// Copy neighbor list from host
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
int * BaseChargeT::reset_nbors(const int nall, const int inum, int *ilist,
int *numj, int **firstneigh, bool &success) {
success=true;
@ -150,7 +149,7 @@ int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
// Build neighbor list on device
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
inline void BaseChargeT::build_nbor_list(const int inum,
const int host_inum,
const int nall, double **host_x,
int *host_type, double *sublo,
@ -179,7 +178,7 @@ inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
// Copy nbor list from host if necessary and then calculate forces, virials,..
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
void BaseChargeT::compute(const int f_ago, const int inum_full,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
@ -226,7 +225,7 @@ void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
// Reneighbor on GPU if necessary and then compute forces, virials, energies
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
int** BaseChargeT::compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, int *tag,
int **nspecial, int **special, const bool eflag,
@ -279,13 +278,13 @@ int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
}
template <class numtyp, class acctyp>
double ChargeGPUMemoryT::host_memory_usage_atomic() const {
double BaseChargeT::host_memory_usage_atomic() const {
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
4*sizeof(numtyp)+sizeof(ChargeGPUMemory<numtyp,acctyp>);
4*sizeof(numtyp)+sizeof(BaseCharge<numtyp,acctyp>);
}
template <class numtyp, class acctyp>
void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
void BaseChargeT::compile_kernels(UCL_Device &dev, const char *pair_str) {
if (_compiled)
return;
@ -302,5 +301,5 @@ void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
_compiled=true;
}
template class ChargeGPUMemory<PRECISION,ACC_PRECISION>;
template class BaseCharge<PRECISION,ACC_PRECISION>;

View File

@ -1,22 +1,21 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
base_charge.h
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Base class for pair styles needing per-particle data for position,
charge, and type.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
#ifndef CHARGE_GPU_MEMORY_H
#define CHARGE_GPU_MEMORY_H
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef LAL_BASE_CHARGE_H
#define LAL_BASE_CHARGE_H
#include "device.h"
#include "balance.h"
@ -29,10 +28,10 @@
#endif
template <class numtyp, class acctyp>
class ChargeGPUMemory {
class BaseCharge {
public:
ChargeGPUMemory();
virtual ~ChargeGPUMemory();
BaseCharge();
virtual ~BaseCharge();
/// Clear any previous data and set up for a new LAMMPS run
/** \param max_nbors initial number of rows in the neighbor matrix
@ -146,7 +145,7 @@ class ChargeGPUMemory {
// -------------------------- DEVICE DATA -------------------------
/// Device Properties and Atom and Neighbor storage
PairGPUDevice<numtyp,acctyp> *device;
Device<numtyp,acctyp> *device;
/// Geryon device
UCL_Device *ucl_device;
@ -155,7 +154,7 @@ class ChargeGPUMemory {
UCL_Timer time_pair;
/// Host device load balancer
PairGPUBalance<numtyp,acctyp> hd_balancer;
Balance<numtyp,acctyp> hd_balancer;
/// LAMMPS pointer for screen output
FILE *screen;
@ -163,17 +162,17 @@ class ChargeGPUMemory {
// --------------------------- ATOM DATA --------------------------
/// Atom Data
PairGPUAtom<numtyp,acctyp> *atom;
Atom<numtyp,acctyp> *atom;
// ------------------------ FORCE/ENERGY DATA -----------------------
PairGPUAns<numtyp,acctyp> *ans;
Answer<numtyp,acctyp> *ans;
// --------------------------- NBOR DATA ----------------------------
/// Neighbor data
PairGPUNbor *nbor;
Neighbor *nbor;
/// True if we need to accumulate time for neighboring
bool nbor_time_avail;

View File

@ -1,7 +1,7 @@
/***************************************************************************
base_ellipsoid.cpp
-------------------
W. Michael Brown
W. Michael Brown (ORNL)
Base class for acceleration of ellipsoid potentials
@ -23,13 +23,13 @@ using namespace LAMMPS_AL;
#endif
#define BaseEllipsoidT BaseEllipsoid<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
extern Device<PRECISION,ACC_PRECISION> global_device;
template <class numtyp, class acctyp>
BaseEllipsoidT::BaseEllipsoid() : _compiled(false), _max_bytes(0) {
device=&pair_gpu_device;
ans=new PairGPUAns<numtyp,acctyp>();
nbor=new PairGPUNbor();
device=&global_device;
ans=new Answer<numtyp,acctyp>();
nbor=new Neighbor();
}
template <class numtyp, class acctyp>
@ -56,7 +56,7 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
_ellipsoid_sphere=ellip_sphere;
bool gpu_nbor=false;
if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
gpu_nbor=true;
int _gpu_host=0;

View File

@ -1,7 +1,7 @@
/***************************************************************************
base_ellipsoid.h
-------------------
W. Michael Brown
W. Michael Brown (ORNL)
Base class for acceleration of ellipsoid potentials
@ -13,8 +13,8 @@
email : brownw@ornl.gov
***************************************************************************/
#ifndef BASE_ELLIPSOID_H
#define BASE_ELLIPSOID_H
#ifndef LAL_BASE_ELLIPSOID_H
#define LAL_BASE_ELLIPSOID_H
#include "device.h"
#include "balance.h"
@ -182,7 +182,7 @@ class BaseEllipsoid {
// -------------------------- DEVICE DATA -------------------------
/// Device Properties and Atom and Neighbor storage
PairGPUDevice<numtyp,acctyp> *device;
Device<numtyp,acctyp> *device;
/// Geryon device
UCL_Device *ucl_device;
@ -192,7 +192,7 @@ class BaseEllipsoid {
UCL_Timer time_nbor3, time_ellipsoid3;
/// Host device load balancer
PairGPUBalance<numtyp,acctyp> hd_balancer;
Balance<numtyp,acctyp> hd_balancer;
/// LAMMPS pointer for screen output
FILE *screen;
@ -200,7 +200,7 @@ class BaseEllipsoid {
// --------------------------- ATOM DATA --------------------------
/// Atom Data
PairGPUAtom<numtyp,acctyp> *atom;
Atom<numtyp,acctyp> *atom;
// --------------------------- TYPE DATA --------------------------
@ -209,12 +209,12 @@ class BaseEllipsoid {
// ------------------------ FORCE/ENERGY DATA -----------------------
PairGPUAns<numtyp,acctyp> *ans;
Answer<numtyp,acctyp> *ans;
// --------------------------- NBOR DATA ----------------------------
/// Neighbor data
PairGPUNbor *nbor;
Neighbor *nbor;
/// ilist with particles sorted by type
UCL_H_Vec<int> host_olist;
/// True if we need to accumulate time for neighboring

View File

@ -25,10 +25,10 @@
#include <cassert>
#define CMM_GPU_MemoryT CMM_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
CMM_GPU_MemoryT::CMM_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
CMM_GPU_MemoryT::CMM_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
}
template <class numtyp, class acctyp>

View File

@ -21,7 +21,7 @@
#include "base_atomic.h"
template <class numtyp, class acctyp>
class CMM_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
class CMM_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
public:
CMM_GPU_Memory();
~CMM_GPU_Memory();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define CMML_GPU_MemoryT CMML_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
CMML_GPU_MemoryT::CMML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
CMML_GPU_MemoryT::CMML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
_allocated(false) {
}

View File

@ -21,7 +21,7 @@
#include "base_charge.h"
template <class numtyp, class acctyp>
class CMML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
class CMML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
public:
CMML_GPU_Memory();
~CMML_GPU_Memory();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define CMMM_GPU_MemoryT CMMM_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
CMMM_GPU_MemoryT::CMMM_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
CMMM_GPU_MemoryT::CMMM_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
_allocated(false) {
}

View File

@ -21,7 +21,7 @@
#include "base_charge.h"
template <class numtyp, class acctyp>
class CMMM_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
class CMMM_GPU_Memory : public BaseCharge<numtyp, acctyp> {
public:
CMMM_GPU_Memory();
~CMMM_GPU_Memory();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define CRML_GPU_MemoryT CRML_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
CRML_GPU_MemoryT::CRML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
CRML_GPU_MemoryT::CRML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
_allocated(false) {
}

View File

@ -21,7 +21,7 @@
#include "base_charge.h"
template <class numtyp, class acctyp>
class CRML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
class CRML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
public:
CRML_GPU_Memory();
~CRML_GPU_Memory();

View File

@ -1,19 +1,17 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
device.cpp
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for management of the device where the computations are performed
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
begin :
email : brownw@ornl.gov
***************************************************************************/
#include "device.h"
#include "precision.h"
@ -29,21 +27,21 @@
#include "pair_gpu_dev_ptx.h"
#endif
#define PairGPUDeviceT PairGPUDevice<numtyp, acctyp>
#define DeviceT Device<numtyp, acctyp>
template <class numtyp, class acctyp>
PairGPUDeviceT::PairGPUDevice() : _init_count(0), _device_init(false),
DeviceT::Device() : _init_count(0), _device_init(false),
_gpu_mode(GPU_FORCE), _first_device(0),
_last_device(0), _compiled(false) {
}
template <class numtyp, class acctyp>
PairGPUDeviceT::~PairGPUDevice() {
DeviceT::~Device() {
clear_device();
}
template <class numtyp, class acctyp>
int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica,
int DeviceT::init_device(MPI_Comm world, MPI_Comm replica,
const int first_gpu, const int last_gpu,
const int gpu_mode, const double p_split,
const int nthreads, const int t_per_atom) {
@ -135,10 +133,10 @@ int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica,
}
template <class numtyp, class acctyp>
int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
const bool rot, const int nlocal,
const int host_nlocal, const int nall,
PairGPUNbor *nbor, const int maxspecial,
Neighbor *nbor, const int maxspecial,
const int gpu_host, const int max_nbors,
const double cell_size, const bool pre_cut) {
if (!_device_init)
@ -181,7 +179,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
if (!ans.init(ef_nlocal,charge,rot,*gpu))
return -3;
if (!nbor->init(&_nbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
*gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
_block_cell_id, _block_nbor_build))
return -3;
@ -192,7 +190,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
}
template <class numtyp, class acctyp>
int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
const int nall) {
if (!_device_init)
return -1;
@ -215,21 +213,21 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::set_single_precompute
void DeviceT::set_single_precompute
(PPPMGPUMemory<numtyp,acctyp,float,_lgpu_float4> *pppm) {
_long_range_precompute=1;
pppm_single=pppm;
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::set_double_precompute
void DeviceT::set_double_precompute
(PPPMGPUMemory<numtyp,acctyp,double,_lgpu_double4> *pppm) {
_long_range_precompute=2;
pppm_double=pppm;
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::init_message(FILE *screen, const char *name,
void DeviceT::init_message(FILE *screen, const char *name,
const int first_gpu, const int last_gpu) {
#ifdef USE_OPENCL
std::string fs="";
@ -272,7 +270,7 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name,
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls,
void DeviceT::estimate_gpu_overhead(const int kernel_calls,
double &gpu_overhead,
double &gpu_driver_overhead) {
UCL_H_Vec<int> *host_data_in=NULL, *host_data_out=NULL;
@ -384,9 +382,9 @@ void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls,
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
PairGPUAns<numtyp,acctyp> &ans,
PairGPUNbor &nbor, const double avg_split,
void DeviceT::output_times(UCL_Timer &time_pair,
Answer<numtyp,acctyp> &ans,
Neighbor &nbor, const double avg_split,
const double max_bytes,
const double gpu_overhead,
const double driver_overhead,
@ -440,12 +438,12 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in,
void DeviceT::output_kspace_times(UCL_Timer &time_in,
UCL_Timer &time_out,
UCL_Timer &time_map,
UCL_Timer &time_rho,
UCL_Timer &time_interp,
PairGPUAns<numtyp,acctyp> &ans,
Answer<numtyp,acctyp> &ans,
const double max_bytes,
const double cpu_time,
const double idle_time, FILE *screen) {
@ -500,13 +498,13 @@ void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in,
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::clear() {
void DeviceT::clear() {
if (_init_count>0) {
_long_range_precompute=0;
_init_count--;
if (_init_count==0) {
atom.clear();
_nbor_shared.clear();
_neighbor_shared.clear();
if (_compiled) {
k_zero.clear();
k_info.clear();
@ -518,7 +516,7 @@ void PairGPUDeviceT::clear() {
}
template <class numtyp, class acctyp>
void PairGPUDeviceT::clear_device() {
void DeviceT::clear_device() {
while (_init_count>0)
clear();
if (_device_init) {
@ -528,7 +526,7 @@ void PairGPUDeviceT::clear_device() {
}
template <class numtyp, class acctyp>
int PairGPUDeviceT::compile_kernels() {
int DeviceT::compile_kernels() {
int flag=0;
if (_compiled)
@ -588,27 +586,27 @@ int PairGPUDeviceT::compile_kernels() {
}
template <class numtyp, class acctyp>
double PairGPUDeviceT::host_memory_usage() const {
double DeviceT::host_memory_usage() const {
return atom.host_memory_usage()+4*sizeof(numtyp)+
sizeof(PairGPUDevice<numtyp,acctyp>);
sizeof(Device<numtyp,acctyp>);
}
template class PairGPUDevice<PRECISION,ACC_PRECISION>;
PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
template class Device<PRECISION,ACC_PRECISION>;
Device<PRECISION,ACC_PRECISION> global_device;
int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
const int last_gpu, const int gpu_mode,
const double particle_split, const int nthreads,
const int t_per_atom) {
return pair_gpu_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
return global_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
particle_split,nthreads,t_per_atom);
}
void lmp_clear_device() {
pair_gpu_device.clear_device();
global_device.clear_device();
}
double lmp_gpu_forces(double **f, double **tor, double *eatom,
double **vatom, double *virial, double &ecoul) {
return pair_gpu_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
}

View File

@ -1,19 +1,17 @@
/* ----------------------------------------------------------------------
LAMMPS-Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
// **************************************************************************
// device.cu
// -------------------
// W. Michael Brown (ORNL)
//
// Device code for device information
//
// __________________________________________________________________________
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
// __________________________________________________________________________
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
/*************************************************************************
Preprocessor Definitions
@ -63,9 +61,6 @@
*************************************************************************/
#ifndef PAIR_GPU_DEV_KERNEL
#define PAIR_GPU_DEV_KERNEL
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -116,5 +111,3 @@ __kernel void kernel_info(__global int *info) {
info[13]=THREADS_PER_CHARGE;
}
#endif

View File

@ -1,26 +1,24 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
device.h
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for management of the device where the computations are performed
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef PAIR_GPU_DEVICE_H
#define PAIR_GPU_DEVICE_H
#ifndef LAL_DEVICE_H
#define LAL_DEVICE_H
#include "atom.h"
#include "ans.h"
#include "nbor.h"
#include "answer.h"
#include "neighbor.h"
#include "pppm.h"
#include "mpi.h"
#include <sstream>
@ -32,10 +30,10 @@ template <class numtyp, class acctyp,
class grdtyp, class grdtyp4> class PPPMGPUMemory;
template <class numtyp, class acctyp>
class PairGPUDevice {
class Device {
public:
PairGPUDevice();
~PairGPUDevice();
Device();
~Device();
/// Initialize the device for use by this process
/** Sets up a per-device MPI communicator for load balancing and initializes
@ -68,9 +66,9 @@ class PairGPUDevice {
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(PairGPUAns<numtyp,acctyp> &a, const bool charge, const bool rot,
int init(Answer<numtyp,acctyp> &a, const bool charge, const bool rot,
const int nlocal, const int host_nlocal, const int nall,
PairGPUNbor *nbor, const int maxspecial, const int gpu_host,
Neighbor *nbor, const int maxspecial, const int gpu_host,
const int max_nbors, const double cell_size, const bool pre_cut);
/// Initialize the device for Atom storage only
@ -83,7 +81,7 @@ class PairGPUDevice {
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal, const int nall);
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
/// Output a message for pair_style acceleration with device stats
void init_message(FILE *screen, const char *name,
@ -109,8 +107,8 @@ class PairGPUDevice {
inline bool double_precision() { return gpu->double_precision(); }
/// Output a message with timing information
void output_times(UCL_Timer &time_pair, PairGPUAns<numtyp,acctyp> &ans,
PairGPUNbor &nbor, const double avg_split,
void output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
Neighbor &nbor, const double avg_split,
const double max_bytes, const double gpu_overhead,
const double driver_overhead,
const int threads_per_atom, FILE *screen);
@ -119,7 +117,7 @@ class PairGPUDevice {
void output_kspace_times(UCL_Timer &time_in, UCL_Timer &time_out,
UCL_Timer & time_map, UCL_Timer & time_rho,
UCL_Timer &time_interp,
PairGPUAns<numtyp,acctyp> &ans,
Answer<numtyp,acctyp> &ans,
const double max_bytes, const double cpu_time,
const double cpu_idle_time, FILE *screen);
@ -130,7 +128,7 @@ class PairGPUDevice {
void clear_device();
/// Add an answer object for putting forces, energies, etc from GPU to LAMMPS
inline void add_ans_object(PairGPUAns<numtyp,acctyp> *ans)
inline void add_ans_object(Answer<numtyp,acctyp> *ans)
{ ans_queue.push(ans); }
/// Add "answers" (force,energies,etc.) into LAMMPS structures
@ -248,12 +246,12 @@ class PairGPUDevice {
// --------------------------- ATOM DATA --------------------------
/// Atom Data
PairGPUAtom<numtyp,acctyp> atom;
Atom<numtyp,acctyp> atom;
// --------------------------- NBOR DATA ----------------------------
/// Neighbor Data
PairGPUNborShared _nbor_shared;
NeighborShared _neighbor_shared;
// ------------------------ LONG RANGE DATA -------------------------
@ -274,7 +272,7 @@ class PairGPUDevice {
}
private:
std::queue<PairGPUAns<numtyp,acctyp> *> ans_queue;
std::queue<Answer<numtyp,acctyp> *> ans_queue;
int _init_count;
bool _device_init, _host_timer_started, _time_device;
MPI_Comm _comm_world, _comm_replica, _comm_gpu;

View File

@ -1,7 +1,7 @@
// **************************************************************************
// ellipsoid_extra.h
// -------------------
// W. Michael Brown
// W. Michael Brown (ORNL)
//
// Device code for Ellipsoid math routines
//
@ -13,8 +13,8 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifndef ELLIPSOID_EXTRA_H
#define ELLIPSOID_EXTRA_H
#ifndef LAL_ELLIPSOID_EXTRA_H
#define LAL_ELLIPSOID_EXTRA_H
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
@ -409,16 +409,4 @@ __inline void gpu_times_column3(const numtyp m[9], const numtyp v[3],
ans[2] = m[6]*v[0] + m[7]*v[1] + m[8]*v[2];
}
#endif

View File

@ -1,7 +1,7 @@
// **************************************************************************
// ellipsoid_nbor.cu
// -------------------
// W. Michael Brown
// W. Michael Brown (ORNL)
//
// Device code for Ellipsoid neighbor routines
//
@ -13,9 +13,6 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifndef ELLIPSOID_NBOR_H
#define ELLIPSOID_NBOR_H
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -161,5 +158,3 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form,
dev_nbor[ii+nbor_pitch]=newj;
}
}
#endif

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_AL;
#define GayBerneT GayBerne<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
GayBerneT::GayBerne() : BaseEllipsoid<numtyp,acctyp>(),

View File

@ -25,10 +25,10 @@
#include <cassert>
#define LJL_GPU_MemoryT LJL_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
LJL_GPU_MemoryT::LJL_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
LJL_GPU_MemoryT::LJL_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
}
template <class numtyp, class acctyp>

View File

@ -21,7 +21,7 @@
#include "base_atomic.h"
template <class numtyp, class acctyp>
class LJL_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
class LJL_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
public:
LJL_GPU_Memory();
~LJL_GPU_Memory();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define LJ96_GPU_MemoryT LJ96_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
LJ96_GPU_MemoryT::LJ96_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
LJ96_GPU_MemoryT::LJ96_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
}
template <class numtyp, class acctyp>

View File

@ -21,7 +21,7 @@
#include "base_atomic.h"
template <class numtyp, class acctyp>
class LJ96_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
class LJ96_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
public:
LJ96_GPU_Memory();
~LJ96_GPU_Memory();

View File

@ -25,10 +25,10 @@ using namespace LAMMPS_AL;
#define LJClass2LongT LJClass2Long<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
LJClass2LongT::LJClass2Long() : ChargeGPUMemory<numtyp,acctyp>(),
LJClass2LongT::LJClass2Long() : BaseCharge<numtyp,acctyp>(),
_allocated(false) {
}

View File

@ -21,7 +21,7 @@
namespace LAMMPS_AL {
template <class numtyp, class acctyp>
class LJClass2Long : public ChargeGPUMemory<numtyp, acctyp> {
class LJClass2Long : public BaseCharge<numtyp, acctyp> {
public:
LJClass2Long();
~LJClass2Long();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define LJC_GPU_MemoryT LJC_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
LJC_GPU_MemoryT::LJC_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
LJC_GPU_MemoryT::LJC_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
_allocated(false) {
}

View File

@ -21,7 +21,7 @@
#include "base_charge.h"
template <class numtyp, class acctyp>
class LJC_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
class LJC_GPU_Memory : public BaseCharge<numtyp, acctyp> {
public:
LJC_GPU_Memory();
~LJC_GPU_Memory();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define LJCL_GPU_MemoryT LJCL_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
LJCL_GPU_MemoryT::LJCL_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
LJCL_GPU_MemoryT::LJCL_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
_allocated(false) {
}

View File

@ -21,7 +21,7 @@
#include "base_charge.h"
template <class numtyp, class acctyp>
class LJCL_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
class LJCL_GPU_Memory : public BaseCharge<numtyp, acctyp> {
public:
LJCL_GPU_Memory();
~LJCL_GPU_Memory();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define LJE_GPU_MemoryT LJE_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
LJE_GPU_MemoryT::LJE_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
LJE_GPU_MemoryT::LJE_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
}
template <class numtyp, class acctyp>

View File

@ -21,7 +21,7 @@
#include "base_atomic.h"
template <class numtyp, class acctyp>
class LJE_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
class LJE_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
public:
LJE_GPU_Memory();
~LJE_GPU_Memory();

View File

@ -25,10 +25,10 @@
#include <cassert>
#define MOR_GPU_MemoryT MOR_GPU_Memory<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
MOR_GPU_MemoryT::MOR_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
MOR_GPU_MemoryT::MOR_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
}
template <class numtyp, class acctyp>

View File

@ -21,7 +21,7 @@
#include "base_atomic.h"
template <class numtyp, class acctyp>
class MOR_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
class MOR_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
public:
MOR_GPU_Memory();
~MOR_GPU_Memory();

View File

@ -1,46 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#ifdef NV_KERNEL
#include "geryon/ucl_nv_kernel.h"
#else
#define GLOBAL_ID_X get_global_id(0)
#endif
__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
const int inum) {
// ii indexes the two interacting particles in gi
int ii=GLOBAL_ID_X;
if (ii<inum) {
__global int *nbor=dev_nbor+ii+inum;
int numj=*nbor;
nbor+=inum;
__global int *list=dev_ij+*nbor;
__global int *list_end=list+numj;
for ( ; list<list_end; list++) {
*nbor=*list;
nbor+=inum;
}
} // if ii
}

View File

@ -1,58 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#ifndef PAIR_GPU_NBOR_SHARED_H
#define PAIR_GPU_NBOR_SHARED_H
#ifdef USE_OPENCL
#include "geryon/ocl_kernel.h"
#include "geryon/ocl_texture.h"
using namespace ucl_opencl;
#else
#include "geryon/nvd_kernel.h"
#include "geryon/nvd_texture.h"
using namespace ucl_cudadr;
#endif
class PairGPUNborShared {
public:
PairGPUNborShared() : _compiled(false) {}
~PairGPUNborShared() { clear(); }
/// Free all memory on host and device
void clear();
/// Texture for cached position/type access with CUDA
UCL_Texture neigh_tex;
/// Compile kernels for neighbor lists
void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
// ----------------------------- Kernels
UCL_Program *nbor_program, *build_program;
UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
UCL_Kernel k_transpose, k_special;
private:
bool _compiled, _gpu_nbor;
};
#endif

View File

@ -1,27 +1,25 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
neighbor.cpp
-------------------
W. Michael Brown (ORNL)
Peng Wang (Nvidia)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for handling neighbor lists
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
Peng Wang (Nvidia), penwang@nvidia.com
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov, penwang@nvidia.com
***************************************************************************/
#include "precision.h"
#include "nbor.h"
#include "neighbor.h"
#include "device.h"
#include "math.h"
int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
int Neighbor::bytes_per_atom(const int max_nbors) const {
if (_gpu_nbor)
return (max_nbors+2)*sizeof(int);
else if (_use_packing)
@ -30,7 +28,7 @@ int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
return (max_nbors+3)*sizeof(int);
}
bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
bool Neighbor::init(NeighborShared *shared, const int inum,
const int host_inum, const int max_nbors,
const int maxspecial, UCL_Device &devi,
const bool gpu_nbor, const int gpu_host,
@ -89,7 +87,7 @@ bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
return success;
}
void PairGPUNbor::alloc(bool &success) {
void Neighbor::alloc(bool &success) {
dev_nbor.clear();
host_acc.clear();
int nt=_max_atoms+_max_host;
@ -156,7 +154,7 @@ void PairGPUNbor::alloc(bool &success) {
_allocated=true;
}
void PairGPUNbor::clear() {
void Neighbor::clear() {
_gpu_bytes=0.0;
_cell_bytes=0.0;
_c_bytes=0.0;
@ -181,7 +179,7 @@ void PairGPUNbor::clear() {
}
}
double PairGPUNbor::host_memory_usage() const {
double Neighbor::host_memory_usage() const {
if (_gpu_nbor) {
if (_gpu_host)
return host_nbor.row_bytes()*host_nbor.rows()+host_ilist.row_bytes()+
@ -190,10 +188,10 @@ double PairGPUNbor::host_memory_usage() const {
return 0;
} else
return host_packed.row_bytes()*host_packed.rows()+host_acc.row_bytes()+
sizeof(PairGPUNbor);
sizeof(Neighbor);
}
void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
void Neighbor::get_host(const int inum, int *ilist, int *numj,
int **firstneigh, const int block_size) {
time_nbor.start();
@ -258,9 +256,9 @@ void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
}
template <class numtyp, class acctyp>
void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
void Neighbor::build_nbor_list(const int inum, const int host_inum,
const int nall,
PairGPUAtom<numtyp,acctyp> &atom,
Atom<numtyp,acctyp> &atom,
double *sublo, double *subhi, int *tag,
int **nspecial, int **special, bool &success,
int &mn) {
@ -399,8 +397,8 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
time_nbor.stop();
}
template void PairGPUNbor::build_nbor_list<PRECISION,ACC_PRECISION>
template void Neighbor::build_nbor_list<PRECISION,ACC_PRECISION>
(const int inum, const int host_inum, const int nall,
PairGPUAtom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
Atom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
int *, int **, int **, bool &success, int &mn);

View File

@ -1,25 +1,24 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
neighbor.h
-------------------
W. Michael Brown (ORNL)
Peng Wang (Nvidia)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for handling neighbor lists
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
begin :
email : brownw@ornl.gov, penwang@nvidia.com
***************************************************************************/
#ifndef PAIR_GPU_NBOR_H
#define PAIR_GPU_NBOR_H
#ifndef LAL_NEIGHBOR_H
#define LAL_NEIGHBOR_H
#include "atom.h"
#include "nbor_shared.h"
#include "neighbor_shared.h"
#define IJ_SIZE 131072
@ -37,10 +36,10 @@ using namespace ucl_cudadr;
#endif
class PairGPUNbor {
class Neighbor {
public:
PairGPUNbor() : _allocated(false), _use_packing(false) {}
~PairGPUNbor() { clear(); }
Neighbor() : _allocated(false), _use_packing(false) {}
~Neighbor() { clear(); }
/// Determine whether neighbor unpacking should be used
/** If false, twice as much memory is reserved to allow unpacking neighbors by
@ -57,7 +56,7 @@ class PairGPUNbor {
* 2 if gpu_nbor is true, and host needs a full nbor list
* \param pre_cut True if cutoff test will be performed in separate kernel
* than the force kernel **/
bool init(PairGPUNborShared *shared, const int inum, const int host_inum,
bool init(NeighborShared *shared, const int inum, const int host_inum,
const int max_nbors, const int maxspecial, UCL_Device &dev,
const bool gpu_nbor, const int gpu_host, const bool pre_cut,
const int block_cell_2d, const int block_cell_id,
@ -138,7 +137,7 @@ class PairGPUNbor {
/// Build nbor list on the device
template <class numtyp, class acctyp>
void build_nbor_list(const int inum, const int host_inum, const int nall,
PairGPUAtom<numtyp,acctyp> &atom, double *sublo,
Atom<numtyp,acctyp> &atom, double *sublo,
double *subhi, int *tag, int **nspecial, int **special,
bool &success, int &max_nbors);
@ -187,7 +186,7 @@ class PairGPUNbor {
UCL_Timer time_nbor, time_kernel;
private:
PairGPUNborShared *_shared;
NeighborShared *_shared;
UCL_Device *dev;
bool _allocated, _use_packing;
int _max_atoms, _max_nbors, _max_host, _nbor_pitch, _maxspecial;

44
lib/gpu/neighbor_cpu.cu Normal file
View File

@ -0,0 +1,44 @@
// **************************************************************************
// atom.cu
// -------------------
// W. Michael Brown (ORNL)
//
// Device code for handling CPU generated neighbor lists
//
// __________________________________________________________________________
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
// __________________________________________________________________________
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#include "geryon/ucl_nv_kernel.h"
#else
#define GLOBAL_ID_X get_global_id(0)
#endif
__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
const int inum) {
// ii indexes the two interacting particles in gi
int ii=GLOBAL_ID_X;
if (ii<inum) {
__global int *nbor=dev_nbor+ii+inum;
int numj=*nbor;
nbor+=inum;
__global int *list=dev_ij+*nbor;
__global int *list_end=list+numj;
for ( ; list<list_end; list++) {
*nbor=*list;
nbor+=inum;
}
} // if ii
}

View File

@ -1,20 +1,18 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Peng Wang (Nvidia), penwang@nvidia.com
Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
// **************************************************************************
// atom.cu
// -------------------
// Peng Wang (Nvidia)
// W. Michael Brown (ORNL)
//
// Device code for handling GPU generated neighbor lists
//
// __________________________________________________________________________
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
// __________________________________________________________________________
//
// begin :
// email : penwang@nvidia.com, brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL

View File

@ -1,21 +1,19 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
/***************************************************************************
neighbor_shared.cpp
-------------------
W. Michael Brown (ORNL)
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
Class for management of data shared by all neighbor lists
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
#include "nbor_shared.h"
begin :
email : brownw@ornl.gov
***************************************************************************/
#include "neighbor_shared.h"
#ifdef USE_OPENCL
#include "nbor_cl.h"
@ -24,7 +22,7 @@
#include "pair_gpu_build_ptx.h"
#endif
void PairGPUNborShared::clear() {
void NeighborShared::clear() {
if (_compiled) {
if (_gpu_nbor) {
k_cell_id.clear();
@ -41,7 +39,7 @@ void PairGPUNborShared::clear() {
}
}
void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
void NeighborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
if (_compiled)
return;
@ -50,7 +48,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
if (gpu_nbor==false) {
nbor_program=new UCL_Program(dev);
nbor_program->load_string(nbor_cpu,flags.c_str());
nbor_program->load_string(neighbor_cpu,flags.c_str());
k_nbor.set_function(*nbor_program,"kernel_unpack");
} else {
build_program=new UCL_Program(dev);
@ -58,7 +56,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
std::cerr << "CANNOT CURRENTLY USE GPU NEIGHBORING WITH OPENCL\n";
exit(1);
#else
build_program->load_string(nbor_gpu,flags.c_str());
build_program->load_string(neighbor_gpu,flags.c_str());
#endif
k_cell_id.set_function(*build_program,"calc_cell_id");
k_cell_counts.set_function(*build_program,"kernel_calc_cell_counts");

56
lib/gpu/neighbor_shared.h Normal file
View File

@ -0,0 +1,56 @@
/***************************************************************************
neighbor_shared.h
-------------------
W. Michael Brown (ORNL)
Class for management of data shared by all neighbor lists
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef LAL_NEIGHBOR_SHARED_H
#define LAL_NEIGHBOR_SHARED_H
#ifdef USE_OPENCL
#include "geryon/ocl_kernel.h"
#include "geryon/ocl_texture.h"
using namespace ucl_opencl;
#else
#include "geryon/nvd_kernel.h"
#include "geryon/nvd_texture.h"
using namespace ucl_cudadr;
#endif
class NeighborShared {
public:
NeighborShared() : _compiled(false) {}
~NeighborShared() { clear(); }
/// Free all memory on host and device
void clear();
/// Texture for cached position/type access with CUDA
UCL_Texture neigh_tex;
/// Compile kernels for neighbor lists
void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
// ----------------------------- Kernels
UCL_Program *nbor_program, *build_program;
UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
UCL_Kernel k_transpose, k_special;
private:
bool _compiled, _gpu_nbor;
};
#endif

View File

@ -26,13 +26,13 @@
#define PPPMGPUMemoryT PPPMGPUMemory<numtyp, acctyp, grdtyp, grdtyp4>
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
extern Device<PRECISION,ACC_PRECISION> global_device;
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
PPPMGPUMemoryT::PPPMGPUMemory() : _allocated(false), _compiled(false),
_max_bytes(0) {
device=&pair_gpu_device;
ans=new PairGPUAns<numtyp,acctyp>();
device=&global_device;
ans=new Answer<numtyp,acctyp>();
}
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>

View File

@ -27,7 +27,7 @@
#include "geryon/nvd_texture.h"
#endif
template <class numtyp, class acctyp> class PairGPUDevice;
template <class numtyp, class acctyp> class Device;
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
class PPPMGPUMemory {
@ -118,7 +118,7 @@ class PPPMGPUMemory {
// -------------------------- DEVICE DATA -------------------------
/// Device Properties and Atom and Neighbor storage
PairGPUDevice<numtyp,acctyp> *device;
Device<numtyp,acctyp> *device;
/// Geryon device
UCL_Device *ucl_device;
@ -132,7 +132,7 @@ class PPPMGPUMemory {
// --------------------------- ATOM DATA --------------------------
/// Atom Data
PairGPUAtom<numtyp,acctyp> *atom;
Atom<numtyp,acctyp> *atom;
// --------------------------- GRID DATA --------------------------
@ -162,7 +162,7 @@ class PPPMGPUMemory {
// ------------------------ FORCE/ENERGY DATA -----------------------
PairGPUAns<numtyp,acctyp> *ans;
Answer<numtyp,acctyp> *ans;
// ------------------------- DEVICE KERNELS -------------------------
UCL_Program *pppm_program;

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_AL;
#define RESquaredT RESquared<numtyp, acctyp>
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
RESquaredT::RESquared() : BaseEllipsoid<numtyp,acctyp>(),