git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15248 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2016-07-01 23:27:26 +00:00
parent 8366b35459
commit 9656958169
245 changed files with 4890 additions and 4832 deletions

View File

@ -10,7 +10,7 @@
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
begin :
email : brownw@ornl.gov, penwang@nvidia.com
***************************************************************************/
@ -28,12 +28,12 @@ class Neighbor {
public:
Neighbor() : _allocated(false), _use_packing(false), _ncells(0) {}
~Neighbor() { clear(); }
/// Determine whether neighbor unpacking should be used
/** If false, twice as much memory is reserved to allow unpacking neighbors by
/** If false, twice as much memory is reserved to allow unpacking neighbors by
* atom for coalesced access. **/
void packing(const bool use_packing) { _use_packing=use_packing; }
/// Clear any old data and setup for new LAMMPS run
/** \param inum Initial number of particles whose neighbors stored on device
* \param host_inum Initial number of particles whose nbors copied to host
@ -45,20 +45,20 @@ class Neighbor {
* 1 if gpu_nbor is true, and host needs a half nbor list,
* 2 if gpu_nbor is true, and host needs a full nbor list
* \param pre_cut True if cutoff test will be performed in separate kernel
* than the force kernel
* than the force kernel
* \param threads_per_atom Number of threads used per atom for force
* calculation
* calculation
* \param compile_flags Flags for JIT compiling **/
bool init(NeighborShared *shared, const int inum, const int host_inum,
const int max_nbors, const int maxspecial, UCL_Device &dev,
const int gpu_nbor, const int gpu_host, const bool pre_cut,
const int block_cell_2d, const int block_cell_id,
const int block_cell_2d, const int block_cell_id,
const int block_nbor_build, const int threads_per_atom,
const int warp_size, const bool time_device,
const int warp_size, const bool time_device,
const std::string compile_flags);
/// Set the size of the cutoff+skin
inline void cell_size(const double size, const double cutoff) {
inline void cell_size(const double size, const double cutoff) {
_cell_size=size;
_cutoff=cutoff;
if (cutoff>size)
@ -66,7 +66,7 @@ class Neighbor {
else
_cells_in_cutoff=1;
}
/// Get the size of the cutoff+skin
inline double cell_size() const { return _cell_size; }
@ -88,7 +88,7 @@ class Neighbor {
* \param host_inum Number of particles whose nbors will be copied to host
* \param max_nbor Current max number of neighbors for a particle
* \param success False if insufficient memory **/
inline void resize(const int inum, const int host_inum, const int max_nbor,
inline void resize(const int inum, const int host_inum, const int max_nbor,
bool &success) {
if (inum>_max_atoms || max_nbor>_max_nbors || host_inum>_max_host) {
_max_atoms=static_cast<int>(static_cast<double>(inum)*1.10);
@ -124,40 +124,40 @@ class Neighbor {
/// Free all memory on host and device
void clear();
/// Bytes per atom used on device
int bytes_per_atom(const int max_nbors) const;
/// Total host memory used by class
double host_memory_usage() const;
/// Returns the type of neighboring:
/** - 0 if neighboring will be performed on host
* - 1 if neighboring will be performed on device
* - 2 if binning on host and neighboring on device **/
inline int gpu_nbor() const { return _gpu_nbor; }
/// Make a copy of unpacked nbor lists in the packed storage area (for gb)
inline void copy_unpacked(const int inum, const int maxj)
inline void copy_unpacked(const int inum, const int maxj)
{ ucl_copy(dev_packed,dev_nbor,inum*(maxj+2),true); }
/// Copy neighbor list from host (first time or from a rebuild)
void get_host(const int inum, int *ilist, int *numj,
/// Copy neighbor list from host (first time or from a rebuild)
void get_host(const int inum, int *ilist, int *numj,
int **firstneigh, const int block_size);
/// Copy neighbor list from host for 3-body (first time or from a rebuild)
void get_host3(const int inum, const int nlist, int *ilist, int *numj,
/// Copy neighbor list from host for 3-body (first time or from a rebuild)
void get_host3(const int inum, const int nlist, int *ilist, int *numj,
int **firstneigh, const int block_size);
/// Return the stride in elements for each nbor row
inline int nbor_pitch() const { return _nbor_pitch; }
/// Return the maximum number of atoms that can currently be stored
inline int max_atoms() const { return _max_atoms; }
/// Return the maximum number of nbors for a particle based on current alloc
inline int max_nbors() const { return _max_nbors; }
/// Return the time spent binning on the CPU for hybrid neighbor builds
inline double bin_time() const { return _bin_time; }
@ -171,9 +171,9 @@ class Neighbor {
/// Build nbor list on the device
template <class numtyp, class acctyp>
void build_nbor_list(double **x, const int inum, const int host_inum,
void build_nbor_list(double **x, const int inum, const int host_inum,
const int nall, Atom<numtyp,acctyp> &atom, double *sublo,
double *subhi, tagint *tag, int **nspecial, tagint **special,
double *subhi, tagint *tag, int **nspecial, tagint **special,
bool &success, int &max_nbors);
/// Return the number of bytes used on device
@ -184,7 +184,7 @@ class Neighbor {
return res;
}
// ------------------------------- Data -------------------------------
/// Device neighbor matrix
@ -199,6 +199,8 @@ class Neighbor {
UCL_H_Vec<int> host_packed;
/// Host storage for nbor counts (row 1) & accumulated neighbor counts (row2)
UCL_H_Vec<int> host_acc;
/// Device storage for accessing atom indices from the neighbor list (3-body)
UCL_D_Vec<int> dev_acc;
// ----------------- Data for GPU Neighbor Calculation ---------------
@ -219,7 +221,7 @@ class Neighbor {
/// Device timers
UCL_Timer time_nbor, time_kernel, time_hybrid1, time_hybrid2, time_transpose;
private:
NeighborShared *_shared;
UCL_Device *dev;
@ -231,14 +233,14 @@ class Neighbor {
double _gpu_bytes, _c_bytes, _cell_bytes;
void alloc(bool &success);
int _block_cell_2d, _block_cell_id, _max_block_nbor_build, _block_nbor_build;
int _ncells, _threads_per_atom, _total_atoms;
int _cells_in_cutoff;
template <class numtyp, class acctyp>
inline void resize_max_neighbors(const int maxn, bool &success);
int _warp_size;
inline void set_nbor_block_size(const int mn) {
int desired=mn/(2*_warp_size);