More renaming...
This commit is contained in:
@ -31,8 +31,8 @@ UCL_H = $(wildcard ./geryon/ucl*.h)
|
||||
NVC_H = $(wildcard ./geryon/nvc*.h) $(UCL_H)
|
||||
NVD_H = $(wildcard ./geryon/nvd*.h) $(UCL_H) nv_kernel_def.h
|
||||
# Headers for Pair Stuff
|
||||
PAIR_H = atom.h ans.h nbor_shared.h \
|
||||
nbor.h precision.h device.h \
|
||||
PAIR_H = atom.h answer.h neighbor_shared.h \
|
||||
neighbor.h precision.h device.h \
|
||||
balance.h pppm.h
|
||||
|
||||
ALL_H = $(NVD_H) $(PAIR_H)
|
||||
@ -42,7 +42,7 @@ CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
|
||||
$(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
|
||||
$(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
|
||||
OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
|
||||
$(OBJ_DIR)/nbor.o $(OBJ_DIR)/nbor_shared.o \
|
||||
$(OBJ_DIR)/nbor.o $(OBJ_DIR)/neighbor_shared.o \
|
||||
$(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
|
||||
$(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
|
||||
$(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
|
||||
@ -62,8 +62,8 @@ OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
|
||||
$(CUDPP)
|
||||
PTXS = $(OBJ_DIR)/device.ptx \
|
||||
$(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom_ptx.h \
|
||||
$(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
|
||||
$(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
|
||||
$(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h \
|
||||
$(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
|
||||
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \
|
||||
$(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \
|
||||
$(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \
|
||||
@ -111,26 +111,26 @@ $(OBJ_DIR)/atom_ptx.h: $(OBJ_DIR)/atom.ptx
|
||||
$(OBJ_DIR)/atom.o: atom.cpp atom.h $(NVD_H) $(OBJ_DIR)/atom_ptx.h
|
||||
$(CUDR) -o $@ -c atom.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/ans.o: ans.cpp ans.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c ans.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/ans.o: answer.cpp answer.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c answer.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/nbor_cpu.ptx: nbor_cpu.cu
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_cpu.cu
|
||||
$(OBJ_DIR)/neighbor_cpu.ptx: neighbor_cpu.cu
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_cpu.cu
|
||||
|
||||
$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/nbor_cpu.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h
|
||||
$(OBJ_DIR)/nbor_ptx.h: $(OBJ_DIR)/neighbor_cpu.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/nbor_ptx.h
|
||||
|
||||
$(OBJ_DIR)/nbor_gpu.ptx: nbor_gpu.cu
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ nbor_gpu.cu
|
||||
$(OBJ_DIR)/neighbor_gpu.ptx: neighbor_gpu.cu
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ neighbor_gpu.cu
|
||||
|
||||
$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/nbor_gpu.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/nbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
|
||||
$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/neighbor_gpu.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
|
||||
|
||||
$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OBJ_DIR)/nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h nbor_shared.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c nbor.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h neighbor_shared.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c neighbor.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/device.ptx: device.cu
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ device.cu
|
||||
|
||||
@ -24,15 +24,15 @@ OCL_LIB = $(LIB_DIR)/libgpu.a
|
||||
UCL_H = $(wildcard ./geryon/ucl*.h)
|
||||
OCL_H = $(wildcard ./geryon/ocl*.h) $(UCL_H)
|
||||
# Headers for Pair Stuff
|
||||
PAIR_H = atom.h ans.h nbor_shared.h \
|
||||
nbor.h precision.h device.h \
|
||||
PAIR_H = atom.h answer.h neighbor_shared.h \
|
||||
neighbor.h precision.h device.h \
|
||||
balance.h pppm.h
|
||||
|
||||
ALL_H = $(OCL_H) $(PAIR_H)
|
||||
|
||||
EXECS = $(BIN_DIR)/ocl_get_devices
|
||||
OBJS = $(OBJ_DIR)/atom.o $(OBJ_DIR)/ans.o \
|
||||
$(OBJ_DIR)/nbor_shared.o $(OBJ_DIR)/nbor.o \
|
||||
$(OBJ_DIR)/neighbor_shared.o $(OBJ_DIR)/nbor.o \
|
||||
$(OBJ_DIR)/device.o $(OBJ_DIR)/base_atomic.o \
|
||||
$(OBJ_DIR)/base_charge.o $(OBJ_DIR)/base_ellipsoid.o \
|
||||
$(OBJ_DIR)/pppm.o $(OBJ_DIR)/pppm_ext.o \
|
||||
@ -69,17 +69,17 @@ $(OBJ_DIR)/atom_cl.h: atom.cu
|
||||
$(OBJ_DIR)/atom.o: atom.cpp atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h
|
||||
$(OCL) -o $@ -c atom.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/ans.o: ans.cpp ans.h $(OCL_H)
|
||||
$(OCL) -o $@ -c ans.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/ans.o: answer.cpp answer.h $(OCL_H)
|
||||
$(OCL) -o $@ -c answer.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/nbor_cl.h: nbor_cpu.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh nbor_cpu.cu $(OBJ_DIR)/nbor_cl.h
|
||||
$(OBJ_DIR)/nbor_cl.h: neighbor_cpu.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu.cu $(OBJ_DIR)/nbor_cl.h
|
||||
|
||||
$(OBJ_DIR)/nbor_shared.o: nbor_shared.cpp nbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
|
||||
$(OCL) -o $@ -c nbor_shared.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/neighbor_shared.o: neighbor_shared.cpp neighbor_shared.h $(OCL_H) $(OBJ_DIR)/nbor_cl.h
|
||||
$(OCL) -o $@ -c neighbor_shared.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/nbor.o: nbor.cpp nbor.h $(OCL_H) nbor_shared.h
|
||||
$(OCL) -o $@ -c nbor.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/nbor.o: neighbor.cpp neighbor.h $(OCL_H) neighbor_shared.h
|
||||
$(OCL) -o $@ -c neighbor.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/pair_gpu_dev_cl.h: device.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh device.cu $(OBJ_DIR)/pair_gpu_dev_cl.h
|
||||
|
||||
@ -1,31 +1,29 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
answer.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for data management of forces, torques, energies, and virials
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
#include "ans.h"
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#define PairGPUAnsT PairGPUAns<numtyp,acctyp>
|
||||
#include "answer.h"
|
||||
|
||||
#define AnswerT Answer<numtyp,acctyp>
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
PairGPUAnsT::PairGPUAns() : _allocated(false),_eflag(false),_vflag(false),
|
||||
AnswerT::Answer() : _allocated(false),_eflag(false),_vflag(false),
|
||||
_inum(0),_ilist(NULL),_newton(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int PairGPUAnsT::bytes_per_atom() const {
|
||||
int AnswerT::bytes_per_atom() const {
|
||||
int bytes=11*sizeof(acctyp);
|
||||
if (_rot)
|
||||
bytes+=4*sizeof(acctyp);
|
||||
@ -35,7 +33,7 @@ int PairGPUAnsT::bytes_per_atom() const {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool PairGPUAnsT::alloc(const int inum) {
|
||||
bool AnswerT::alloc(const int inum) {
|
||||
_max_local=static_cast<int>(static_cast<double>(inum)*1.10);
|
||||
|
||||
bool success=true;
|
||||
@ -70,7 +68,7 @@ bool PairGPUAnsT::alloc(const int inum) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
|
||||
bool AnswerT::init(const int inum, const bool charge, const bool rot,
|
||||
UCL_Device &devi) {
|
||||
clear();
|
||||
|
||||
@ -100,7 +98,7 @@ bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
|
||||
bool AnswerT::add_fields(const bool charge, const bool rot) {
|
||||
bool realloc=false;
|
||||
if (charge && _charge==false) {
|
||||
_charge=true;
|
||||
@ -122,7 +120,7 @@ bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAnsT::clear_resize() {
|
||||
void AnswerT::clear_resize() {
|
||||
if (!_allocated)
|
||||
return;
|
||||
_allocated=false;
|
||||
@ -134,7 +132,7 @@ void PairGPUAnsT::clear_resize() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAnsT::clear() {
|
||||
void AnswerT::clear() {
|
||||
_gpu_bytes=0;
|
||||
if (!_allocated)
|
||||
return;
|
||||
@ -148,7 +146,7 @@ void PairGPUAnsT::clear() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double PairGPUAnsT::host_memory_usage() const {
|
||||
double AnswerT::host_memory_usage() const {
|
||||
int atom_bytes=4;
|
||||
if (_charge)
|
||||
atom_bytes+=1;
|
||||
@ -156,11 +154,11 @@ double PairGPUAnsT::host_memory_usage() const {
|
||||
atom_bytes+=4;
|
||||
int ans_bytes=atom_bytes+_ev_fields;
|
||||
return ans_bytes*(_max_local)*sizeof(acctyp)+
|
||||
sizeof(PairGPUAns<numtyp,acctyp>);
|
||||
sizeof(Answer<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
|
||||
void AnswerT::copy_answers(const bool eflag, const bool vflag,
|
||||
const bool ef_atom, const bool vf_atom) {
|
||||
time_answer.start();
|
||||
_eflag=eflag;
|
||||
@ -184,7 +182,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
|
||||
void AnswerT::copy_answers(const bool eflag, const bool vflag,
|
||||
const bool ef_atom, const bool vf_atom,
|
||||
int *ilist) {
|
||||
_ilist=ilist;
|
||||
@ -192,7 +190,7 @@ void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
|
||||
double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
double *virial) {
|
||||
if (_eflag==false && _vflag==false)
|
||||
return 0.0;
|
||||
@ -268,7 +266,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
|
||||
double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
double *virial, double &ecoul) {
|
||||
if (_eflag==false && _vflag==false)
|
||||
return 0.0;
|
||||
@ -359,7 +357,7 @@ double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAnsT::get_answers(double **f, double **tor) {
|
||||
void AnswerT::get_answers(double **f, double **tor) {
|
||||
acctyp *ap=host_ans.begin();
|
||||
if (_ilist==NULL) {
|
||||
for (int i=0; i<_inum; i++) {
|
||||
@ -404,4 +402,4 @@ void PairGPUAnsT::get_answers(double **f, double **tor) {
|
||||
}
|
||||
}
|
||||
|
||||
template class PairGPUAns<PRECISION,ACC_PRECISION>;
|
||||
template class Answer<PRECISION,ACC_PRECISION>;
|
||||
@ -1,22 +1,20 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
answer.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for data management of forces, torques, energies, and virials
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef PAIR_GPU_ANS_H
|
||||
#define PAIR_GPU_ANS_H
|
||||
#ifndef LAL_ANSWER_H
|
||||
#define LAL_ANSWER_H
|
||||
|
||||
#include <math.h>
|
||||
#include "mpi.h"
|
||||
@ -38,10 +36,10 @@ using namespace ucl_cudadr;
|
||||
#include "precision.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class PairGPUAns {
|
||||
class Answer {
|
||||
public:
|
||||
PairGPUAns();
|
||||
~PairGPUAns() { clear(); }
|
||||
Answer();
|
||||
~Answer() { clear(); }
|
||||
|
||||
/// Current number of local atoms stored
|
||||
inline int inum() const { return _inum; }
|
||||
@ -1,26 +1,24 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
atom.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for particle data management
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#include "atom.h"
|
||||
|
||||
#define PairGPUAtomT PairGPUAtom<numtyp,acctyp>
|
||||
#define AtomT Atom<numtyp,acctyp>
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
|
||||
AtomT::Atom() : _compiled(false),_allocated(false),
|
||||
_max_gpu_bytes(0) {
|
||||
#ifndef USE_OPENCL
|
||||
sort_config.op = CUDPP_ADD;
|
||||
@ -31,7 +29,7 @@ PairGPUAtomT::PairGPUAtom() : _compiled(false),_allocated(false),
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int PairGPUAtomT::bytes_per_atom() const {
|
||||
int AtomT::bytes_per_atom() const {
|
||||
int id_space=0;
|
||||
if (_gpu_nbor)
|
||||
id_space=2;
|
||||
@ -44,7 +42,7 @@ int PairGPUAtomT::bytes_per_atom() const {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool PairGPUAtomT::alloc(const int nall) {
|
||||
bool AtomT::alloc(const int nall) {
|
||||
_max_atoms=static_cast<int>(static_cast<double>(nall)*1.10);
|
||||
|
||||
bool success=true;
|
||||
@ -138,7 +136,7 @@ bool PairGPUAtomT::alloc(const int nall) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
|
||||
bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
const bool gpu_nbor, const bool bonds) {
|
||||
bool realloc=false;
|
||||
if (charge && _charge==false) {
|
||||
@ -167,7 +165,7 @@ bool PairGPUAtomT::add_fields(const bool charge, const bool rot,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
|
||||
bool AtomT::init(const int nall, const bool charge, const bool rot,
|
||||
UCL_Device &devi, const bool gpu_nbor,
|
||||
const bool bonds) {
|
||||
clear();
|
||||
@ -206,7 +204,7 @@ bool PairGPUAtomT::init(const int nall, const bool charge, const bool rot,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAtomT::clear_resize() {
|
||||
void AtomT::clear_resize() {
|
||||
if (!_allocated)
|
||||
return;
|
||||
_allocated=false;
|
||||
@ -240,7 +238,7 @@ void PairGPUAtomT::clear_resize() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAtomT::clear() {
|
||||
void AtomT::clear() {
|
||||
_max_gpu_bytes=0;
|
||||
if (!_allocated)
|
||||
return;
|
||||
@ -260,19 +258,19 @@ void PairGPUAtomT::clear() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double PairGPUAtomT::host_memory_usage() const {
|
||||
double AtomT::host_memory_usage() const {
|
||||
int atom_bytes=4;
|
||||
if (_charge)
|
||||
atom_bytes+=1;
|
||||
if (_rot)
|
||||
atom_bytes+=4;
|
||||
return _max_atoms*atom_bytes*sizeof(numtyp)+
|
||||
sizeof(PairGPUAtom<numtyp,acctyp>);
|
||||
sizeof(Atom<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
// Sort arrays for neighbor list calculation
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAtomT::sort_neighbor(const int num_atoms) {
|
||||
void AtomT::sort_neighbor(const int num_atoms) {
|
||||
#ifndef USE_OPENCL
|
||||
CUDPPResult result = cudppSort(sort_plan, (unsigned *)dev_cell_id.begin(),
|
||||
(int *)dev_particle_id.begin(),
|
||||
@ -292,7 +290,7 @@ void PairGPUAtomT::sort_neighbor(const int num_atoms) {
|
||||
#endif
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUAtomT::compile_kernels(UCL_Device &dev) {
|
||||
void AtomT::compile_kernels(UCL_Device &dev) {
|
||||
atom_program=new UCL_Program(dev);
|
||||
atom_program->load_string(atom,"");
|
||||
k_cast_x.set_function(*atom_program,"kernel_cast_x");
|
||||
@ -301,4 +299,4 @@ void PairGPUAtomT::compile_kernels(UCL_Device &dev) {
|
||||
|
||||
#endif
|
||||
|
||||
template class PairGPUAtom<PRECISION,ACC_PRECISION>;
|
||||
template class Atom<PRECISION,ACC_PRECISION>;
|
||||
|
||||
@ -1,19 +1,17 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
// **************************************************************************
|
||||
// atom.cu
|
||||
// -------------------
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
// Device code for atom data casting
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#include "geryon/ucl_nv_kernel.h"
|
||||
|
||||
@ -1,19 +1,17 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
atom.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for particle data management
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef PAIR_GPU_ATOM_H
|
||||
#define PAIR_GPU_ATOM_H
|
||||
@ -41,10 +39,10 @@ using namespace ucl_cudadr;
|
||||
#include "precision.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class PairGPUAtom {
|
||||
class Atom {
|
||||
public:
|
||||
PairGPUAtom();
|
||||
~PairGPUAtom() { clear(); }
|
||||
Atom();
|
||||
~Atom() { clear(); }
|
||||
|
||||
/// Maximum number of atoms that can be stored with current allocation
|
||||
inline int max_atoms() const { return _max_atoms; }
|
||||
|
||||
@ -1,22 +1,20 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
balance.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for host-device load balancing
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
#ifndef PAIR_GPU_BALANCE_H
|
||||
#define PAIR_GPU_BALANCE_H
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_BALANCE_H
|
||||
#define LAL_BALANCE_H
|
||||
|
||||
#include "device.h"
|
||||
#include <math.h>
|
||||
@ -27,13 +25,13 @@
|
||||
|
||||
/// Host/device load balancer
|
||||
template<class numtyp, class acctyp>
|
||||
class PairGPUBalance {
|
||||
class Balance {
|
||||
public:
|
||||
inline PairGPUBalance() : _init_done(false), _measure_this_step(false) {}
|
||||
inline ~PairGPUBalance() { clear(); }
|
||||
inline Balance() : _init_done(false), _measure_this_step(false) {}
|
||||
inline ~Balance() { clear(); }
|
||||
|
||||
/// Clear any old data and setup for new LAMMPS run
|
||||
inline void init(PairGPUDevice<numtyp, acctyp> *gpu, const bool gpu_nbor,
|
||||
inline void init(Device<numtyp, acctyp> *gpu, const bool gpu_nbor,
|
||||
const double split);
|
||||
|
||||
/// Clear all host and device data
|
||||
@ -107,7 +105,7 @@ class PairGPUBalance {
|
||||
}
|
||||
|
||||
private:
|
||||
PairGPUDevice<numtyp,acctyp> *_device;
|
||||
Device<numtyp,acctyp> *_device;
|
||||
UCL_Timer _device_time;
|
||||
bool _init_done, _gpu_nbor;
|
||||
|
||||
@ -119,10 +117,10 @@ class PairGPUBalance {
|
||||
int _inum, _inum_full, _timestep;
|
||||
};
|
||||
|
||||
#define PairGPUBalanceT PairGPUBalance<numtyp,acctyp>
|
||||
#define BalanceT Balance<numtyp,acctyp>
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu,
|
||||
void BalanceT::init(Device<numtyp, acctyp> *gpu,
|
||||
const bool gpu_nbor, const double split) {
|
||||
clear();
|
||||
_gpu_nbor=gpu_nbor;
|
||||
@ -145,7 +143,7 @@ void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
|
||||
int BalanceT::get_gpu_count(const int ago, const int inum_full) {
|
||||
_measure_this_step=false;
|
||||
if (_load_balance) {
|
||||
if (_avg_count<11 || _timestep%_HD_BALANCE_EVERY==0) {
|
||||
@ -164,7 +162,7 @@ int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUBalanceT::balance(const double cpu_time) {
|
||||
void BalanceT::balance(const double cpu_time) {
|
||||
if (_measure_this_step) {
|
||||
_measure_this_step=false;
|
||||
double gpu_time=_device_time.seconds();
|
||||
|
||||
@ -1,46 +1,44 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
base_atomic.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Base class for pair styles with per-particle data for position and type
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#include "base_atomic.h"
|
||||
#define AtomicGPUMemoryT AtomicGPUMemory<numtyp, acctyp>
|
||||
#define BaseAtomicT BaseAtomic<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
|
||||
extern Device<PRECISION,ACC_PRECISION> global_device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
AtomicGPUMemoryT::AtomicGPUMemory() : _compiled(false), _max_bytes(0) {
|
||||
device=&pair_gpu_device;
|
||||
ans=new PairGPUAns<numtyp,acctyp>();
|
||||
nbor=new PairGPUNbor();
|
||||
BaseAtomicT::BaseAtomic() : _compiled(false), _max_bytes(0) {
|
||||
device=&global_device;
|
||||
ans=new Answer<numtyp,acctyp>();
|
||||
nbor=new Neighbor();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
AtomicGPUMemoryT::~AtomicGPUMemory() {
|
||||
BaseAtomicT::~BaseAtomic() {
|
||||
delete ans;
|
||||
delete nbor;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int AtomicGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
|
||||
int BaseAtomicT::bytes_per_atom_atomic(const int max_nbors) const {
|
||||
return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
|
||||
nbor->bytes_per_atom(max_nbors);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
int BaseAtomicT::init_atomic(const int nlocal, const int nall,
|
||||
const int max_nbors, const int maxspecial,
|
||||
const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
@ -49,7 +47,7 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
screen=_screen;
|
||||
|
||||
bool gpu_nbor=false;
|
||||
if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
|
||||
if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
|
||||
gpu_nbor=true;
|
||||
|
||||
int _gpu_host=0;
|
||||
@ -90,12 +88,12 @@ int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void AtomicGPUMemoryT::estimate_gpu_overhead() {
|
||||
void BaseAtomicT::estimate_gpu_overhead() {
|
||||
device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void AtomicGPUMemoryT::clear_atomic() {
|
||||
void BaseAtomicT::clear_atomic() {
|
||||
// Output any timing information
|
||||
acc_timers();
|
||||
double avg_split=hd_balancer.all_avg_split();
|
||||
@ -123,7 +121,7 @@ void AtomicGPUMemoryT::clear_atomic() {
|
||||
// Copy neighbor list from host
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
|
||||
int * BaseAtomicT::reset_nbors(const int nall, const int inum, int *ilist,
|
||||
int *numj, int **firstneigh, bool &success) {
|
||||
success=true;
|
||||
|
||||
@ -147,7 +145,7 @@ int * AtomicGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
|
||||
// Build neighbor list on device
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
|
||||
inline void BaseAtomicT::build_nbor_list(const int inum,
|
||||
const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
@ -176,7 +174,7 @@ inline void AtomicGPUMemoryT::build_nbor_list(const int inum,
|
||||
// Copy nbor list from host if necessary and then calculate forces, virials,..
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
|
||||
void BaseAtomicT::compute(const int f_ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag,
|
||||
@ -217,7 +215,7 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
|
||||
// Reneighbor on GPU if necessary and then compute forces, virials, energies
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
|
||||
int ** BaseAtomicT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, const bool eflag,
|
||||
@ -263,13 +261,13 @@ int ** AtomicGPUMemoryT::compute(const int ago, const int inum_full,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double AtomicGPUMemoryT::host_memory_usage_atomic() const {
|
||||
double BaseAtomicT::host_memory_usage_atomic() const {
|
||||
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
|
||||
4*sizeof(numtyp)+sizeof(AtomicGPUMemory<numtyp,acctyp>);
|
||||
4*sizeof(numtyp)+sizeof(BaseAtomic<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
|
||||
void BaseAtomicT::compile_kernels(UCL_Device &dev, const char *pair_str) {
|
||||
if (_compiled)
|
||||
return;
|
||||
|
||||
@ -285,5 +283,5 @@ void AtomicGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
|
||||
_compiled=true;
|
||||
}
|
||||
|
||||
template class AtomicGPUMemory<PRECISION,ACC_PRECISION>;
|
||||
template class BaseAtomic<PRECISION,ACC_PRECISION>;
|
||||
|
||||
|
||||
@ -1,22 +1,20 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
base_atomic.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Base class for pair styles with per-particle data for position and type
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
#ifndef ATOMIC_GPU_MEMORY_H
|
||||
#define ATOMIC_GPU_MEMORY_H
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_BASE_ATOMIC_H
|
||||
#define LAL_BASE_ATOMIC_H
|
||||
|
||||
#include "device.h"
|
||||
#include "balance.h"
|
||||
@ -29,10 +27,10 @@
|
||||
#endif
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class AtomicGPUMemory {
|
||||
class BaseAtomic {
|
||||
public:
|
||||
AtomicGPUMemory();
|
||||
virtual ~AtomicGPUMemory();
|
||||
BaseAtomic();
|
||||
virtual ~BaseAtomic();
|
||||
|
||||
/// Clear any previous data and set up for a new LAMMPS run
|
||||
/** \param max_nbors initial number of rows in the neighbor matrix
|
||||
@ -150,7 +148,7 @@ class AtomicGPUMemory {
|
||||
// -------------------------- DEVICE DATA -------------------------
|
||||
|
||||
/// Device Properties and Atom and Neighbor storage
|
||||
PairGPUDevice<numtyp,acctyp> *device;
|
||||
Device<numtyp,acctyp> *device;
|
||||
|
||||
/// Geryon device
|
||||
UCL_Device *ucl_device;
|
||||
@ -159,7 +157,7 @@ class AtomicGPUMemory {
|
||||
UCL_Timer time_pair;
|
||||
|
||||
/// Host device load balancer
|
||||
PairGPUBalance<numtyp,acctyp> hd_balancer;
|
||||
Balance<numtyp,acctyp> hd_balancer;
|
||||
|
||||
/// LAMMPS pointer for screen output
|
||||
FILE *screen;
|
||||
@ -167,16 +165,16 @@ class AtomicGPUMemory {
|
||||
// --------------------------- ATOM DATA --------------------------
|
||||
|
||||
/// Atom Data
|
||||
PairGPUAtom<numtyp,acctyp> *atom;
|
||||
Atom<numtyp,acctyp> *atom;
|
||||
|
||||
// ------------------------ FORCE/ENERGY DATA -----------------------
|
||||
|
||||
PairGPUAns<numtyp,acctyp> *ans;
|
||||
Answer<numtyp,acctyp> *ans;
|
||||
|
||||
// --------------------------- NBOR DATA ----------------------------
|
||||
|
||||
/// Neighbor data
|
||||
PairGPUNbor *nbor;
|
||||
Neighbor *nbor;
|
||||
|
||||
/// True if we need to accumulate time for neighboring
|
||||
bool nbor_time_avail;
|
||||
|
||||
@ -1,46 +1,45 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
base_charge.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Base class for pair styles needing per-particle data for position,
|
||||
charge, and type.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#include "base_charge.h"
|
||||
#define ChargeGPUMemoryT ChargeGPUMemory<numtyp, acctyp>
|
||||
#define BaseChargeT BaseCharge<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
|
||||
extern Device<PRECISION,ACC_PRECISION> global_device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
ChargeGPUMemoryT::ChargeGPUMemory() : _compiled(false), _max_bytes(0) {
|
||||
device=&pair_gpu_device;
|
||||
ans=new PairGPUAns<numtyp,acctyp>();
|
||||
nbor=new PairGPUNbor();
|
||||
BaseChargeT::BaseCharge() : _compiled(false), _max_bytes(0) {
|
||||
device=&global_device;
|
||||
ans=new Answer<numtyp,acctyp>();
|
||||
nbor=new Neighbor();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
ChargeGPUMemoryT::~ChargeGPUMemory() {
|
||||
BaseChargeT::~BaseCharge() {
|
||||
delete ans;
|
||||
delete nbor;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int ChargeGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const {
|
||||
int BaseChargeT::bytes_per_atom_atomic(const int max_nbors) const {
|
||||
return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
|
||||
nbor->bytes_per_atom(max_nbors);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
||||
const int max_nbors, const int maxspecial,
|
||||
const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
@ -49,7 +48,7 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
screen=_screen;
|
||||
|
||||
bool gpu_nbor=false;
|
||||
if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
|
||||
if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
|
||||
gpu_nbor=true;
|
||||
|
||||
int _gpu_host=0;
|
||||
@ -92,12 +91,12 @@ int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void ChargeGPUMemoryT::estimate_gpu_overhead() {
|
||||
void BaseChargeT::estimate_gpu_overhead() {
|
||||
device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void ChargeGPUMemoryT::clear_atomic() {
|
||||
void BaseChargeT::clear_atomic() {
|
||||
// Output any timing information
|
||||
acc_timers();
|
||||
double avg_split=hd_balancer.all_avg_split();
|
||||
@ -125,7 +124,7 @@ void ChargeGPUMemoryT::clear_atomic() {
|
||||
// Copy neighbor list from host
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
|
||||
int * BaseChargeT::reset_nbors(const int nall, const int inum, int *ilist,
|
||||
int *numj, int **firstneigh, bool &success) {
|
||||
success=true;
|
||||
|
||||
@ -150,7 +149,7 @@ int * ChargeGPUMemoryT::reset_nbors(const int nall, const int inum, int *ilist,
|
||||
// Build neighbor list on device
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
|
||||
inline void BaseChargeT::build_nbor_list(const int inum,
|
||||
const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
@ -179,7 +178,7 @@ inline void ChargeGPUMemoryT::build_nbor_list(const int inum,
|
||||
// Copy nbor list from host if necessary and then calculate forces, virials,..
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
|
||||
void BaseChargeT::compute(const int f_ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag,
|
||||
@ -226,7 +225,7 @@ void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full,
|
||||
// Reneighbor on GPU if necessary and then compute forces, virials, energies
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
|
||||
int** BaseChargeT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, const bool eflag,
|
||||
@ -279,13 +278,13 @@ int** ChargeGPUMemoryT::compute(const int ago, const int inum_full,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double ChargeGPUMemoryT::host_memory_usage_atomic() const {
|
||||
double BaseChargeT::host_memory_usage_atomic() const {
|
||||
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
|
||||
4*sizeof(numtyp)+sizeof(ChargeGPUMemory<numtyp,acctyp>);
|
||||
4*sizeof(numtyp)+sizeof(BaseCharge<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
|
||||
void BaseChargeT::compile_kernels(UCL_Device &dev, const char *pair_str) {
|
||||
if (_compiled)
|
||||
return;
|
||||
|
||||
@ -302,5 +301,5 @@ void ChargeGPUMemoryT::compile_kernels(UCL_Device &dev, const char *pair_str) {
|
||||
_compiled=true;
|
||||
}
|
||||
|
||||
template class ChargeGPUMemory<PRECISION,ACC_PRECISION>;
|
||||
template class BaseCharge<PRECISION,ACC_PRECISION>;
|
||||
|
||||
|
||||
@ -1,22 +1,21 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Charge/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
base_charge.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Base class for pair styles needing per-particle data for position,
|
||||
charge, and type.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
#ifndef CHARGE_GPU_MEMORY_H
|
||||
#define CHARGE_GPU_MEMORY_H
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_BASE_CHARGE_H
|
||||
#define LAL_BASE_CHARGE_H
|
||||
|
||||
#include "device.h"
|
||||
#include "balance.h"
|
||||
@ -29,10 +28,10 @@
|
||||
#endif
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class ChargeGPUMemory {
|
||||
class BaseCharge {
|
||||
public:
|
||||
ChargeGPUMemory();
|
||||
virtual ~ChargeGPUMemory();
|
||||
BaseCharge();
|
||||
virtual ~BaseCharge();
|
||||
|
||||
/// Clear any previous data and set up for a new LAMMPS run
|
||||
/** \param max_nbors initial number of rows in the neighbor matrix
|
||||
@ -146,7 +145,7 @@ class ChargeGPUMemory {
|
||||
// -------------------------- DEVICE DATA -------------------------
|
||||
|
||||
/// Device Properties and Atom and Neighbor storage
|
||||
PairGPUDevice<numtyp,acctyp> *device;
|
||||
Device<numtyp,acctyp> *device;
|
||||
|
||||
/// Geryon device
|
||||
UCL_Device *ucl_device;
|
||||
@ -155,7 +154,7 @@ class ChargeGPUMemory {
|
||||
UCL_Timer time_pair;
|
||||
|
||||
/// Host device load balancer
|
||||
PairGPUBalance<numtyp,acctyp> hd_balancer;
|
||||
Balance<numtyp,acctyp> hd_balancer;
|
||||
|
||||
/// LAMMPS pointer for screen output
|
||||
FILE *screen;
|
||||
@ -163,17 +162,17 @@ class ChargeGPUMemory {
|
||||
// --------------------------- ATOM DATA --------------------------
|
||||
|
||||
/// Atom Data
|
||||
PairGPUAtom<numtyp,acctyp> *atom;
|
||||
Atom<numtyp,acctyp> *atom;
|
||||
|
||||
|
||||
// ------------------------ FORCE/ENERGY DATA -----------------------
|
||||
|
||||
PairGPUAns<numtyp,acctyp> *ans;
|
||||
Answer<numtyp,acctyp> *ans;
|
||||
|
||||
// --------------------------- NBOR DATA ----------------------------
|
||||
|
||||
/// Neighbor data
|
||||
PairGPUNbor *nbor;
|
||||
Neighbor *nbor;
|
||||
|
||||
/// True if we need to accumulate time for neighboring
|
||||
bool nbor_time_avail;
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/***************************************************************************
|
||||
base_ellipsoid.cpp
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Base class for acceleration of ellipsoid potentials
|
||||
|
||||
@ -23,13 +23,13 @@ using namespace LAMMPS_AL;
|
||||
#endif
|
||||
|
||||
#define BaseEllipsoidT BaseEllipsoid<numtyp, acctyp>
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
|
||||
extern Device<PRECISION,ACC_PRECISION> global_device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
BaseEllipsoidT::BaseEllipsoid() : _compiled(false), _max_bytes(0) {
|
||||
device=&pair_gpu_device;
|
||||
ans=new PairGPUAns<numtyp,acctyp>();
|
||||
nbor=new PairGPUNbor();
|
||||
device=&global_device;
|
||||
ans=new Answer<numtyp,acctyp>();
|
||||
nbor=new Neighbor();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -56,7 +56,7 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
|
||||
_ellipsoid_sphere=ellip_sphere;
|
||||
|
||||
bool gpu_nbor=false;
|
||||
if (device->gpu_mode()==PairGPUDevice<numtyp,acctyp>::GPU_NEIGH)
|
||||
if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
|
||||
gpu_nbor=true;
|
||||
|
||||
int _gpu_host=0;
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/***************************************************************************
|
||||
base_ellipsoid.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Base class for acceleration of ellipsoid potentials
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef BASE_ELLIPSOID_H
|
||||
#define BASE_ELLIPSOID_H
|
||||
#ifndef LAL_BASE_ELLIPSOID_H
|
||||
#define LAL_BASE_ELLIPSOID_H
|
||||
|
||||
#include "device.h"
|
||||
#include "balance.h"
|
||||
@ -182,7 +182,7 @@ class BaseEllipsoid {
|
||||
// -------------------------- DEVICE DATA -------------------------
|
||||
|
||||
/// Device Properties and Atom and Neighbor storage
|
||||
PairGPUDevice<numtyp,acctyp> *device;
|
||||
Device<numtyp,acctyp> *device;
|
||||
|
||||
/// Geryon device
|
||||
UCL_Device *ucl_device;
|
||||
@ -192,7 +192,7 @@ class BaseEllipsoid {
|
||||
UCL_Timer time_nbor3, time_ellipsoid3;
|
||||
|
||||
/// Host device load balancer
|
||||
PairGPUBalance<numtyp,acctyp> hd_balancer;
|
||||
Balance<numtyp,acctyp> hd_balancer;
|
||||
|
||||
/// LAMMPS pointer for screen output
|
||||
FILE *screen;
|
||||
@ -200,7 +200,7 @@ class BaseEllipsoid {
|
||||
// --------------------------- ATOM DATA --------------------------
|
||||
|
||||
/// Atom Data
|
||||
PairGPUAtom<numtyp,acctyp> *atom;
|
||||
Atom<numtyp,acctyp> *atom;
|
||||
|
||||
// --------------------------- TYPE DATA --------------------------
|
||||
|
||||
@ -209,12 +209,12 @@ class BaseEllipsoid {
|
||||
|
||||
// ------------------------ FORCE/ENERGY DATA -----------------------
|
||||
|
||||
PairGPUAns<numtyp,acctyp> *ans;
|
||||
Answer<numtyp,acctyp> *ans;
|
||||
|
||||
// --------------------------- NBOR DATA ----------------------------
|
||||
|
||||
/// Neighbor data
|
||||
PairGPUNbor *nbor;
|
||||
Neighbor *nbor;
|
||||
/// ilist with particles sorted by type
|
||||
UCL_H_Vec<int> host_olist;
|
||||
/// True if we need to accumulate time for neighboring
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define CMM_GPU_MemoryT CMM_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
CMM_GPU_MemoryT::CMM_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
|
||||
CMM_GPU_MemoryT::CMM_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_atomic.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class CMM_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
|
||||
class CMM_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
|
||||
public:
|
||||
CMM_GPU_Memory();
|
||||
~CMM_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define CMML_GPU_MemoryT CMML_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
CMML_GPU_MemoryT::CMML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
|
||||
CMML_GPU_MemoryT::CMML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_charge.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class CMML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
|
||||
class CMML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
|
||||
public:
|
||||
CMML_GPU_Memory();
|
||||
~CMML_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define CMMM_GPU_MemoryT CMMM_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
CMMM_GPU_MemoryT::CMMM_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
|
||||
CMMM_GPU_MemoryT::CMMM_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_charge.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class CMMM_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
|
||||
class CMMM_GPU_Memory : public BaseCharge<numtyp, acctyp> {
|
||||
public:
|
||||
CMMM_GPU_Memory();
|
||||
~CMMM_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define CRML_GPU_MemoryT CRML_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
CRML_GPU_MemoryT::CRML_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
|
||||
CRML_GPU_MemoryT::CRML_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_charge.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class CRML_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
|
||||
class CRML_GPU_Memory : public BaseCharge<numtyp, acctyp> {
|
||||
public:
|
||||
CRML_GPU_Memory();
|
||||
~CRML_GPU_Memory();
|
||||
|
||||
@ -1,19 +1,17 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
device.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for management of the device where the computations are performed
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#include "device.h"
|
||||
#include "precision.h"
|
||||
@ -29,21 +27,21 @@
|
||||
#include "pair_gpu_dev_ptx.h"
|
||||
#endif
|
||||
|
||||
#define PairGPUDeviceT PairGPUDevice<numtyp, acctyp>
|
||||
#define DeviceT Device<numtyp, acctyp>
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
PairGPUDeviceT::PairGPUDevice() : _init_count(0), _device_init(false),
|
||||
DeviceT::Device() : _init_count(0), _device_init(false),
|
||||
_gpu_mode(GPU_FORCE), _first_device(0),
|
||||
_last_device(0), _compiled(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
PairGPUDeviceT::~PairGPUDevice() {
|
||||
DeviceT::~Device() {
|
||||
clear_device();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica,
|
||||
int DeviceT::init_device(MPI_Comm world, MPI_Comm replica,
|
||||
const int first_gpu, const int last_gpu,
|
||||
const int gpu_mode, const double p_split,
|
||||
const int nthreads, const int t_per_atom) {
|
||||
@ -135,10 +133,10 @@ int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
|
||||
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
const bool rot, const int nlocal,
|
||||
const int host_nlocal, const int nall,
|
||||
PairGPUNbor *nbor, const int maxspecial,
|
||||
Neighbor *nbor, const int maxspecial,
|
||||
const int gpu_host, const int max_nbors,
|
||||
const double cell_size, const bool pre_cut) {
|
||||
if (!_device_init)
|
||||
@ -181,7 +179,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
|
||||
if (!ans.init(ef_nlocal,charge,rot,*gpu))
|
||||
return -3;
|
||||
|
||||
if (!nbor->init(&_nbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
||||
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
||||
*gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
|
||||
_block_cell_id, _block_nbor_build))
|
||||
return -3;
|
||||
@ -192,7 +190,7 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const bool charge,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
|
||||
int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
|
||||
const int nall) {
|
||||
if (!_device_init)
|
||||
return -1;
|
||||
@ -215,21 +213,21 @@ int PairGPUDeviceT::init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::set_single_precompute
|
||||
void DeviceT::set_single_precompute
|
||||
(PPPMGPUMemory<numtyp,acctyp,float,_lgpu_float4> *pppm) {
|
||||
_long_range_precompute=1;
|
||||
pppm_single=pppm;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::set_double_precompute
|
||||
void DeviceT::set_double_precompute
|
||||
(PPPMGPUMemory<numtyp,acctyp,double,_lgpu_double4> *pppm) {
|
||||
_long_range_precompute=2;
|
||||
pppm_double=pppm;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::init_message(FILE *screen, const char *name,
|
||||
void DeviceT::init_message(FILE *screen, const char *name,
|
||||
const int first_gpu, const int last_gpu) {
|
||||
#ifdef USE_OPENCL
|
||||
std::string fs="";
|
||||
@ -272,7 +270,7 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls,
|
||||
void DeviceT::estimate_gpu_overhead(const int kernel_calls,
|
||||
double &gpu_overhead,
|
||||
double &gpu_driver_overhead) {
|
||||
UCL_H_Vec<int> *host_data_in=NULL, *host_data_out=NULL;
|
||||
@ -384,9 +382,9 @@ void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
|
||||
PairGPUAns<numtyp,acctyp> &ans,
|
||||
PairGPUNbor &nbor, const double avg_split,
|
||||
void DeviceT::output_times(UCL_Timer &time_pair,
|
||||
Answer<numtyp,acctyp> &ans,
|
||||
Neighbor &nbor, const double avg_split,
|
||||
const double max_bytes,
|
||||
const double gpu_overhead,
|
||||
const double driver_overhead,
|
||||
@ -440,12 +438,12 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in,
|
||||
void DeviceT::output_kspace_times(UCL_Timer &time_in,
|
||||
UCL_Timer &time_out,
|
||||
UCL_Timer &time_map,
|
||||
UCL_Timer &time_rho,
|
||||
UCL_Timer &time_interp,
|
||||
PairGPUAns<numtyp,acctyp> &ans,
|
||||
Answer<numtyp,acctyp> &ans,
|
||||
const double max_bytes,
|
||||
const double cpu_time,
|
||||
const double idle_time, FILE *screen) {
|
||||
@ -500,13 +498,13 @@ void PairGPUDeviceT::output_kspace_times(UCL_Timer &time_in,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::clear() {
|
||||
void DeviceT::clear() {
|
||||
if (_init_count>0) {
|
||||
_long_range_precompute=0;
|
||||
_init_count--;
|
||||
if (_init_count==0) {
|
||||
atom.clear();
|
||||
_nbor_shared.clear();
|
||||
_neighbor_shared.clear();
|
||||
if (_compiled) {
|
||||
k_zero.clear();
|
||||
k_info.clear();
|
||||
@ -518,7 +516,7 @@ void PairGPUDeviceT::clear() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUDeviceT::clear_device() {
|
||||
void DeviceT::clear_device() {
|
||||
while (_init_count>0)
|
||||
clear();
|
||||
if (_device_init) {
|
||||
@ -528,7 +526,7 @@ void PairGPUDeviceT::clear_device() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int PairGPUDeviceT::compile_kernels() {
|
||||
int DeviceT::compile_kernels() {
|
||||
int flag=0;
|
||||
|
||||
if (_compiled)
|
||||
@ -588,27 +586,27 @@ int PairGPUDeviceT::compile_kernels() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double PairGPUDeviceT::host_memory_usage() const {
|
||||
double DeviceT::host_memory_usage() const {
|
||||
return atom.host_memory_usage()+4*sizeof(numtyp)+
|
||||
sizeof(PairGPUDevice<numtyp,acctyp>);
|
||||
sizeof(Device<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
template class PairGPUDevice<PRECISION,ACC_PRECISION>;
|
||||
PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
|
||||
template class Device<PRECISION,ACC_PRECISION>;
|
||||
Device<PRECISION,ACC_PRECISION> global_device;
|
||||
|
||||
int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
||||
const int last_gpu, const int gpu_mode,
|
||||
const double particle_split, const int nthreads,
|
||||
const int t_per_atom) {
|
||||
return pair_gpu_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
|
||||
return global_device.init_device(world,replica,first_gpu,last_gpu,gpu_mode,
|
||||
particle_split,nthreads,t_per_atom);
|
||||
}
|
||||
|
||||
void lmp_clear_device() {
|
||||
pair_gpu_device.clear_device();
|
||||
global_device.clear_device();
|
||||
}
|
||||
|
||||
double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
||||
double **vatom, double *virial, double &ecoul) {
|
||||
return pair_gpu_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
|
||||
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul);
|
||||
}
|
||||
|
||||
@ -1,19 +1,17 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS-Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
// **************************************************************************
|
||||
// device.cu
|
||||
// -------------------
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
// Device code for device information
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
/*************************************************************************
|
||||
Preprocessor Definitions
|
||||
@ -63,9 +61,6 @@
|
||||
|
||||
*************************************************************************/
|
||||
|
||||
#ifndef PAIR_GPU_DEV_KERNEL
|
||||
#define PAIR_GPU_DEV_KERNEL
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
@ -116,5 +111,3 @@ __kernel void kernel_info(__global int *info) {
|
||||
info[13]=THREADS_PER_CHARGE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,26 +1,24 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
device.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for management of the device where the computations are performed
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef PAIR_GPU_DEVICE_H
|
||||
#define PAIR_GPU_DEVICE_H
|
||||
#ifndef LAL_DEVICE_H
|
||||
#define LAL_DEVICE_H
|
||||
|
||||
#include "atom.h"
|
||||
#include "ans.h"
|
||||
#include "nbor.h"
|
||||
#include "answer.h"
|
||||
#include "neighbor.h"
|
||||
#include "pppm.h"
|
||||
#include "mpi.h"
|
||||
#include <sstream>
|
||||
@ -32,10 +30,10 @@ template <class numtyp, class acctyp,
|
||||
class grdtyp, class grdtyp4> class PPPMGPUMemory;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class PairGPUDevice {
|
||||
class Device {
|
||||
public:
|
||||
PairGPUDevice();
|
||||
~PairGPUDevice();
|
||||
Device();
|
||||
~Device();
|
||||
|
||||
/// Initialize the device for use by this process
|
||||
/** Sets up a per-device MPI communicator for load balancing and initializes
|
||||
@ -68,9 +66,9 @@ class PairGPUDevice {
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(PairGPUAns<numtyp,acctyp> &a, const bool charge, const bool rot,
|
||||
int init(Answer<numtyp,acctyp> &a, const bool charge, const bool rot,
|
||||
const int nlocal, const int host_nlocal, const int nall,
|
||||
PairGPUNbor *nbor, const int maxspecial, const int gpu_host,
|
||||
Neighbor *nbor, const int maxspecial, const int gpu_host,
|
||||
const int max_nbors, const double cell_size, const bool pre_cut);
|
||||
|
||||
/// Initialize the device for Atom storage only
|
||||
@ -83,7 +81,7 @@ class PairGPUDevice {
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(PairGPUAns<numtyp,acctyp> &ans, const int nlocal, const int nall);
|
||||
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
|
||||
|
||||
/// Output a message for pair_style acceleration with device stats
|
||||
void init_message(FILE *screen, const char *name,
|
||||
@ -109,8 +107,8 @@ class PairGPUDevice {
|
||||
inline bool double_precision() { return gpu->double_precision(); }
|
||||
|
||||
/// Output a message with timing information
|
||||
void output_times(UCL_Timer &time_pair, PairGPUAns<numtyp,acctyp> &ans,
|
||||
PairGPUNbor &nbor, const double avg_split,
|
||||
void output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
||||
Neighbor &nbor, const double avg_split,
|
||||
const double max_bytes, const double gpu_overhead,
|
||||
const double driver_overhead,
|
||||
const int threads_per_atom, FILE *screen);
|
||||
@ -119,7 +117,7 @@ class PairGPUDevice {
|
||||
void output_kspace_times(UCL_Timer &time_in, UCL_Timer &time_out,
|
||||
UCL_Timer & time_map, UCL_Timer & time_rho,
|
||||
UCL_Timer &time_interp,
|
||||
PairGPUAns<numtyp,acctyp> &ans,
|
||||
Answer<numtyp,acctyp> &ans,
|
||||
const double max_bytes, const double cpu_time,
|
||||
const double cpu_idle_time, FILE *screen);
|
||||
|
||||
@ -130,7 +128,7 @@ class PairGPUDevice {
|
||||
void clear_device();
|
||||
|
||||
/// Add an answer object for putting forces, energies, etc from GPU to LAMMPS
|
||||
inline void add_ans_object(PairGPUAns<numtyp,acctyp> *ans)
|
||||
inline void add_ans_object(Answer<numtyp,acctyp> *ans)
|
||||
{ ans_queue.push(ans); }
|
||||
|
||||
/// Add "answers" (force,energies,etc.) into LAMMPS structures
|
||||
@ -248,12 +246,12 @@ class PairGPUDevice {
|
||||
// --------------------------- ATOM DATA --------------------------
|
||||
|
||||
/// Atom Data
|
||||
PairGPUAtom<numtyp,acctyp> atom;
|
||||
Atom<numtyp,acctyp> atom;
|
||||
|
||||
// --------------------------- NBOR DATA ----------------------------
|
||||
|
||||
/// Neighbor Data
|
||||
PairGPUNborShared _nbor_shared;
|
||||
NeighborShared _neighbor_shared;
|
||||
|
||||
// ------------------------ LONG RANGE DATA -------------------------
|
||||
|
||||
@ -274,7 +272,7 @@ class PairGPUDevice {
|
||||
}
|
||||
|
||||
private:
|
||||
std::queue<PairGPUAns<numtyp,acctyp> *> ans_queue;
|
||||
std::queue<Answer<numtyp,acctyp> *> ans_queue;
|
||||
int _init_count;
|
||||
bool _device_init, _host_timer_started, _time_device;
|
||||
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
// **************************************************************************
|
||||
// ellipsoid_extra.h
|
||||
// -------------------
|
||||
// W. Michael Brown
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
// Device code for Ellipsoid math routines
|
||||
//
|
||||
@ -13,8 +13,8 @@
|
||||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifndef ELLIPSOID_EXTRA_H
|
||||
#define ELLIPSOID_EXTRA_H
|
||||
#ifndef LAL_ELLIPSOID_EXTRA_H
|
||||
#define LAL_ELLIPSOID_EXTRA_H
|
||||
|
||||
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
|
||||
|
||||
@ -409,16 +409,4 @@ __inline void gpu_times_column3(const numtyp m[9], const numtyp v[3],
|
||||
ans[2] = m[6]*v[0] + m[7]*v[1] + m[8]*v[2];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
// **************************************************************************
|
||||
// ellipsoid_nbor.cu
|
||||
// -------------------
|
||||
// W. Michael Brown
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
// Device code for Ellipsoid neighbor routines
|
||||
//
|
||||
@ -13,9 +13,6 @@
|
||||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifndef ELLIPSOID_NBOR_H
|
||||
#define ELLIPSOID_NBOR_H
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
@ -161,5 +158,3 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form,
|
||||
dev_nbor[ii+nbor_pitch]=newj;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
using namespace LAMMPS_AL;
|
||||
|
||||
#define GayBerneT GayBerne<numtyp, acctyp>
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
GayBerneT::GayBerne() : BaseEllipsoid<numtyp,acctyp>(),
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define LJL_GPU_MemoryT LJL_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJL_GPU_MemoryT::LJL_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
|
||||
LJL_GPU_MemoryT::LJL_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_atomic.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJL_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
|
||||
class LJL_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
|
||||
public:
|
||||
LJL_GPU_Memory();
|
||||
~LJL_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define LJ96_GPU_MemoryT LJ96_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJ96_GPU_MemoryT::LJ96_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
|
||||
LJ96_GPU_MemoryT::LJ96_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_atomic.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJ96_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
|
||||
class LJ96_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
|
||||
public:
|
||||
LJ96_GPU_Memory();
|
||||
~LJ96_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@ using namespace LAMMPS_AL;
|
||||
|
||||
#define LJClass2LongT LJClass2Long<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJClass2LongT::LJClass2Long() : ChargeGPUMemory<numtyp,acctyp>(),
|
||||
LJClass2LongT::LJClass2Long() : BaseCharge<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJClass2Long : public ChargeGPUMemory<numtyp, acctyp> {
|
||||
class LJClass2Long : public BaseCharge<numtyp, acctyp> {
|
||||
public:
|
||||
LJClass2Long();
|
||||
~LJClass2Long();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define LJC_GPU_MemoryT LJC_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJC_GPU_MemoryT::LJC_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
|
||||
LJC_GPU_MemoryT::LJC_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_charge.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJC_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
|
||||
class LJC_GPU_Memory : public BaseCharge<numtyp, acctyp> {
|
||||
public:
|
||||
LJC_GPU_Memory();
|
||||
~LJC_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define LJCL_GPU_MemoryT LJCL_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJCL_GPU_MemoryT::LJCL_GPU_Memory() : ChargeGPUMemory<numtyp,acctyp>(),
|
||||
LJCL_GPU_MemoryT::LJCL_GPU_Memory() : BaseCharge<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_charge.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJCL_GPU_Memory : public ChargeGPUMemory<numtyp, acctyp> {
|
||||
class LJCL_GPU_Memory : public BaseCharge<numtyp, acctyp> {
|
||||
public:
|
||||
LJCL_GPU_Memory();
|
||||
~LJCL_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define LJE_GPU_MemoryT LJE_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJE_GPU_MemoryT::LJE_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
|
||||
LJE_GPU_MemoryT::LJE_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_atomic.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJE_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
|
||||
class LJE_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
|
||||
public:
|
||||
LJE_GPU_Memory();
|
||||
~LJE_GPU_Memory();
|
||||
|
||||
@ -25,10 +25,10 @@
|
||||
#include <cassert>
|
||||
#define MOR_GPU_MemoryT MOR_GPU_Memory<numtyp, acctyp>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
MOR_GPU_MemoryT::MOR_GPU_Memory() : AtomicGPUMemory<numtyp,acctyp>(), _allocated(false) {
|
||||
MOR_GPU_MemoryT::MOR_GPU_Memory() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "base_atomic.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class MOR_GPU_Memory : public AtomicGPUMemory<numtyp, acctyp> {
|
||||
class MOR_GPU_Memory : public BaseAtomic<numtyp, acctyp> {
|
||||
public:
|
||||
MOR_GPU_Memory();
|
||||
~MOR_GPU_Memory();
|
||||
|
||||
@ -1,46 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "geryon/ucl_nv_kernel.h"
|
||||
|
||||
#else
|
||||
|
||||
#define GLOBAL_ID_X get_global_id(0)
|
||||
|
||||
#endif
|
||||
|
||||
__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
|
||||
const int inum) {
|
||||
// ii indexes the two interacting particles in gi
|
||||
int ii=GLOBAL_ID_X;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor=dev_nbor+ii+inum;
|
||||
int numj=*nbor;
|
||||
nbor+=inum;
|
||||
__global int *list=dev_ij+*nbor;
|
||||
__global int *list_end=list+numj;
|
||||
|
||||
for ( ; list<list_end; list++) {
|
||||
*nbor=*list;
|
||||
nbor+=inum;
|
||||
}
|
||||
} // if ii
|
||||
}
|
||||
|
||||
@ -1,58 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef PAIR_GPU_NBOR_SHARED_H
|
||||
#define PAIR_GPU_NBOR_SHARED_H
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
|
||||
#include "geryon/ocl_kernel.h"
|
||||
#include "geryon/ocl_texture.h"
|
||||
using namespace ucl_opencl;
|
||||
|
||||
#else
|
||||
|
||||
#include "geryon/nvd_kernel.h"
|
||||
#include "geryon/nvd_texture.h"
|
||||
using namespace ucl_cudadr;
|
||||
|
||||
#endif
|
||||
|
||||
class PairGPUNborShared {
|
||||
public:
|
||||
PairGPUNborShared() : _compiled(false) {}
|
||||
~PairGPUNborShared() { clear(); }
|
||||
|
||||
/// Free all memory on host and device
|
||||
void clear();
|
||||
|
||||
/// Texture for cached position/type access with CUDA
|
||||
UCL_Texture neigh_tex;
|
||||
|
||||
/// Compile kernels for neighbor lists
|
||||
void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
|
||||
|
||||
// ----------------------------- Kernels
|
||||
UCL_Program *nbor_program, *build_program;
|
||||
UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
|
||||
UCL_Kernel k_transpose, k_special;
|
||||
|
||||
private:
|
||||
bool _compiled, _gpu_nbor;
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -1,27 +1,25 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
neighbor.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
Peng Wang (Nvidia)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for handling neighbor lists
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : brownw@ornl.gov, penwang@nvidia.com
|
||||
***************************************************************************/
|
||||
|
||||
#include "precision.h"
|
||||
#include "nbor.h"
|
||||
#include "neighbor.h"
|
||||
#include "device.h"
|
||||
#include "math.h"
|
||||
|
||||
int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
|
||||
int Neighbor::bytes_per_atom(const int max_nbors) const {
|
||||
if (_gpu_nbor)
|
||||
return (max_nbors+2)*sizeof(int);
|
||||
else if (_use_packing)
|
||||
@ -30,7 +28,7 @@ int PairGPUNbor::bytes_per_atom(const int max_nbors) const {
|
||||
return (max_nbors+3)*sizeof(int);
|
||||
}
|
||||
|
||||
bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
|
||||
bool Neighbor::init(NeighborShared *shared, const int inum,
|
||||
const int host_inum, const int max_nbors,
|
||||
const int maxspecial, UCL_Device &devi,
|
||||
const bool gpu_nbor, const int gpu_host,
|
||||
@ -89,7 +87,7 @@ bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum,
|
||||
return success;
|
||||
}
|
||||
|
||||
void PairGPUNbor::alloc(bool &success) {
|
||||
void Neighbor::alloc(bool &success) {
|
||||
dev_nbor.clear();
|
||||
host_acc.clear();
|
||||
int nt=_max_atoms+_max_host;
|
||||
@ -156,7 +154,7 @@ void PairGPUNbor::alloc(bool &success) {
|
||||
_allocated=true;
|
||||
}
|
||||
|
||||
void PairGPUNbor::clear() {
|
||||
void Neighbor::clear() {
|
||||
_gpu_bytes=0.0;
|
||||
_cell_bytes=0.0;
|
||||
_c_bytes=0.0;
|
||||
@ -181,7 +179,7 @@ void PairGPUNbor::clear() {
|
||||
}
|
||||
}
|
||||
|
||||
double PairGPUNbor::host_memory_usage() const {
|
||||
double Neighbor::host_memory_usage() const {
|
||||
if (_gpu_nbor) {
|
||||
if (_gpu_host)
|
||||
return host_nbor.row_bytes()*host_nbor.rows()+host_ilist.row_bytes()+
|
||||
@ -190,10 +188,10 @@ double PairGPUNbor::host_memory_usage() const {
|
||||
return 0;
|
||||
} else
|
||||
return host_packed.row_bytes()*host_packed.rows()+host_acc.row_bytes()+
|
||||
sizeof(PairGPUNbor);
|
||||
sizeof(Neighbor);
|
||||
}
|
||||
|
||||
void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
|
||||
void Neighbor::get_host(const int inum, int *ilist, int *numj,
|
||||
int **firstneigh, const int block_size) {
|
||||
time_nbor.start();
|
||||
|
||||
@ -258,9 +256,9 @@ void PairGPUNbor::get_host(const int inum, int *ilist, int *numj,
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
|
||||
void Neighbor::build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall,
|
||||
PairGPUAtom<numtyp,acctyp> &atom,
|
||||
Atom<numtyp,acctyp> &atom,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, bool &success,
|
||||
int &mn) {
|
||||
@ -399,8 +397,8 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
|
||||
time_nbor.stop();
|
||||
}
|
||||
|
||||
template void PairGPUNbor::build_nbor_list<PRECISION,ACC_PRECISION>
|
||||
template void Neighbor::build_nbor_list<PRECISION,ACC_PRECISION>
|
||||
(const int inum, const int host_inum, const int nall,
|
||||
PairGPUAtom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
|
||||
Atom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
|
||||
int *, int **, int **, bool &success, int &mn);
|
||||
|
||||
@ -1,25 +1,24 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
neighbor.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
Peng Wang (Nvidia)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for handling neighbor lists
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
begin :
|
||||
email : brownw@ornl.gov, penwang@nvidia.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef PAIR_GPU_NBOR_H
|
||||
#define PAIR_GPU_NBOR_H
|
||||
#ifndef LAL_NEIGHBOR_H
|
||||
#define LAL_NEIGHBOR_H
|
||||
|
||||
#include "atom.h"
|
||||
#include "nbor_shared.h"
|
||||
#include "neighbor_shared.h"
|
||||
|
||||
#define IJ_SIZE 131072
|
||||
|
||||
@ -37,10 +36,10 @@ using namespace ucl_cudadr;
|
||||
|
||||
#endif
|
||||
|
||||
class PairGPUNbor {
|
||||
class Neighbor {
|
||||
public:
|
||||
PairGPUNbor() : _allocated(false), _use_packing(false) {}
|
||||
~PairGPUNbor() { clear(); }
|
||||
Neighbor() : _allocated(false), _use_packing(false) {}
|
||||
~Neighbor() { clear(); }
|
||||
|
||||
/// Determine whether neighbor unpacking should be used
|
||||
/** If false, twice as much memory is reserved to allow unpacking neighbors by
|
||||
@ -57,7 +56,7 @@ class PairGPUNbor {
|
||||
* 2 if gpu_nbor is true, and host needs a full nbor list
|
||||
* \param pre_cut True if cutoff test will be performed in separate kernel
|
||||
* than the force kernel **/
|
||||
bool init(PairGPUNborShared *shared, const int inum, const int host_inum,
|
||||
bool init(NeighborShared *shared, const int inum, const int host_inum,
|
||||
const int max_nbors, const int maxspecial, UCL_Device &dev,
|
||||
const bool gpu_nbor, const int gpu_host, const bool pre_cut,
|
||||
const int block_cell_2d, const int block_cell_id,
|
||||
@ -138,7 +137,7 @@ class PairGPUNbor {
|
||||
/// Build nbor list on the device
|
||||
template <class numtyp, class acctyp>
|
||||
void build_nbor_list(const int inum, const int host_inum, const int nall,
|
||||
PairGPUAtom<numtyp,acctyp> &atom, double *sublo,
|
||||
Atom<numtyp,acctyp> &atom, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial, int **special,
|
||||
bool &success, int &max_nbors);
|
||||
|
||||
@ -187,7 +186,7 @@ class PairGPUNbor {
|
||||
UCL_Timer time_nbor, time_kernel;
|
||||
|
||||
private:
|
||||
PairGPUNborShared *_shared;
|
||||
NeighborShared *_shared;
|
||||
UCL_Device *dev;
|
||||
bool _allocated, _use_packing;
|
||||
int _max_atoms, _max_nbors, _max_host, _nbor_pitch, _maxspecial;
|
||||
44
lib/gpu/neighbor_cpu.cu
Normal file
44
lib/gpu/neighbor_cpu.cu
Normal file
@ -0,0 +1,44 @@
|
||||
// **************************************************************************
|
||||
// atom.cu
|
||||
// -------------------
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
// Device code for handling CPU generated neighbor lists
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "geryon/ucl_nv_kernel.h"
|
||||
|
||||
#else
|
||||
|
||||
#define GLOBAL_ID_X get_global_id(0)
|
||||
|
||||
#endif
|
||||
|
||||
__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
|
||||
const int inum) {
|
||||
// ii indexes the two interacting particles in gi
|
||||
int ii=GLOBAL_ID_X;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor=dev_nbor+ii+inum;
|
||||
int numj=*nbor;
|
||||
nbor+=inum;
|
||||
__global int *list=dev_ij+*nbor;
|
||||
__global int *list_end=list+numj;
|
||||
|
||||
for ( ; list<list_end; list++) {
|
||||
*nbor=*list;
|
||||
nbor+=inum;
|
||||
}
|
||||
} // if ii
|
||||
}
|
||||
|
||||
@ -1,20 +1,18 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
// **************************************************************************
|
||||
// atom.cu
|
||||
// -------------------
|
||||
// Peng Wang (Nvidia)
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
// Device code for handling GPU generated neighbor lists
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : penwang@nvidia.com, brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
@ -1,21 +1,19 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
/***************************************************************************
|
||||
neighbor_shared.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
Class for management of data shared by all neighbor lists
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
#include "nbor_shared.h"
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#include "neighbor_shared.h"
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
#include "nbor_cl.h"
|
||||
@ -24,7 +22,7 @@
|
||||
#include "pair_gpu_build_ptx.h"
|
||||
#endif
|
||||
|
||||
void PairGPUNborShared::clear() {
|
||||
void NeighborShared::clear() {
|
||||
if (_compiled) {
|
||||
if (_gpu_nbor) {
|
||||
k_cell_id.clear();
|
||||
@ -41,7 +39,7 @@ void PairGPUNborShared::clear() {
|
||||
}
|
||||
}
|
||||
|
||||
void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
|
||||
void NeighborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
|
||||
if (_compiled)
|
||||
return;
|
||||
|
||||
@ -50,7 +48,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
|
||||
|
||||
if (gpu_nbor==false) {
|
||||
nbor_program=new UCL_Program(dev);
|
||||
nbor_program->load_string(nbor_cpu,flags.c_str());
|
||||
nbor_program->load_string(neighbor_cpu,flags.c_str());
|
||||
k_nbor.set_function(*nbor_program,"kernel_unpack");
|
||||
} else {
|
||||
build_program=new UCL_Program(dev);
|
||||
@ -58,7 +56,7 @@ void PairGPUNborShared::compile_kernels(UCL_Device &dev, const bool gpu_nbor) {
|
||||
std::cerr << "CANNOT CURRENTLY USE GPU NEIGHBORING WITH OPENCL\n";
|
||||
exit(1);
|
||||
#else
|
||||
build_program->load_string(nbor_gpu,flags.c_str());
|
||||
build_program->load_string(neighbor_gpu,flags.c_str());
|
||||
#endif
|
||||
k_cell_id.set_function(*build_program,"calc_cell_id");
|
||||
k_cell_counts.set_function(*build_program,"kernel_calc_cell_counts");
|
||||
56
lib/gpu/neighbor_shared.h
Normal file
56
lib/gpu/neighbor_shared.h
Normal file
@ -0,0 +1,56 @@
|
||||
/***************************************************************************
|
||||
neighbor_shared.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
Class for management of data shared by all neighbor lists
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : brownw@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_NEIGHBOR_SHARED_H
|
||||
#define LAL_NEIGHBOR_SHARED_H
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
|
||||
#include "geryon/ocl_kernel.h"
|
||||
#include "geryon/ocl_texture.h"
|
||||
using namespace ucl_opencl;
|
||||
|
||||
#else
|
||||
|
||||
#include "geryon/nvd_kernel.h"
|
||||
#include "geryon/nvd_texture.h"
|
||||
using namespace ucl_cudadr;
|
||||
|
||||
#endif
|
||||
|
||||
class NeighborShared {
|
||||
public:
|
||||
NeighborShared() : _compiled(false) {}
|
||||
~NeighborShared() { clear(); }
|
||||
|
||||
/// Free all memory on host and device
|
||||
void clear();
|
||||
|
||||
/// Texture for cached position/type access with CUDA
|
||||
UCL_Texture neigh_tex;
|
||||
|
||||
/// Compile kernels for neighbor lists
|
||||
void compile_kernels(UCL_Device &dev, const bool gpu_nbor);
|
||||
|
||||
// ----------------------------- Kernels
|
||||
UCL_Program *nbor_program, *build_program;
|
||||
UCL_Kernel k_nbor, k_cell_id, k_cell_counts, k_build_nbor;
|
||||
UCL_Kernel k_transpose, k_special;
|
||||
|
||||
private:
|
||||
bool _compiled, _gpu_nbor;
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -26,13 +26,13 @@
|
||||
|
||||
#define PPPMGPUMemoryT PPPMGPUMemory<numtyp, acctyp, grdtyp, grdtyp4>
|
||||
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> pair_gpu_device;
|
||||
extern Device<PRECISION,ACC_PRECISION> global_device;
|
||||
|
||||
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
|
||||
PPPMGPUMemoryT::PPPMGPUMemory() : _allocated(false), _compiled(false),
|
||||
_max_bytes(0) {
|
||||
device=&pair_gpu_device;
|
||||
ans=new PairGPUAns<numtyp,acctyp>();
|
||||
device=&global_device;
|
||||
ans=new Answer<numtyp,acctyp>();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
|
||||
|
||||
@ -27,7 +27,7 @@
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
||||
template <class numtyp, class acctyp> class PairGPUDevice;
|
||||
template <class numtyp, class acctyp> class Device;
|
||||
|
||||
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
|
||||
class PPPMGPUMemory {
|
||||
@ -118,7 +118,7 @@ class PPPMGPUMemory {
|
||||
// -------------------------- DEVICE DATA -------------------------
|
||||
|
||||
/// Device Properties and Atom and Neighbor storage
|
||||
PairGPUDevice<numtyp,acctyp> *device;
|
||||
Device<numtyp,acctyp> *device;
|
||||
|
||||
/// Geryon device
|
||||
UCL_Device *ucl_device;
|
||||
@ -132,7 +132,7 @@ class PPPMGPUMemory {
|
||||
// --------------------------- ATOM DATA --------------------------
|
||||
|
||||
/// Atom Data
|
||||
PairGPUAtom<numtyp,acctyp> *atom;
|
||||
Atom<numtyp,acctyp> *atom;
|
||||
|
||||
|
||||
// --------------------------- GRID DATA --------------------------
|
||||
@ -162,7 +162,7 @@ class PPPMGPUMemory {
|
||||
|
||||
// ------------------------ FORCE/ENERGY DATA -----------------------
|
||||
|
||||
PairGPUAns<numtyp,acctyp> *ans;
|
||||
Answer<numtyp,acctyp> *ans;
|
||||
|
||||
// ------------------------- DEVICE KERNELS -------------------------
|
||||
UCL_Program *pppm_program;
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
using namespace LAMMPS_AL;
|
||||
|
||||
#define RESquaredT RESquared<numtyp, acctyp>
|
||||
extern PairGPUDevice<PRECISION,ACC_PRECISION> device;
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
RESquaredT::RESquared() : BaseEllipsoid<numtyp,acctyp>(),
|
||||
|
||||
Reference in New Issue
Block a user