/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov Peng Wang (Nvidia), penwang@nvidia.com Paul Crozier (SNL), pscrozi@sandia.gov ------------------------------------------------------------------------- */ #ifndef PAIR_GPU_NBOR_H #define PAIR_GPU_NBOR_H #include "nvc_macros.h" #include "nvc_timer.h" #include "nvc_memory.h" #define IJ_SIZE 131072 class PairGPUNbor { public: PairGPUNbor() : _use_packing(false), allocated(false) {} ~PairGPUNbor() { clear(); } /// Determine whether neighbor packing should be used /** If true, twice as much memory is reserved to allow packing neighbors by * atom for coalesced access after cutoff evaluation. This can be used * for expensive potentials where it is more efficient to evaluate the * cutoff separately from the potential in order to reduce thread divergence * for expensive routines **/ void packing(const bool use_packing) { _use_packing=use_packing; } /// Called once to allocate memory bool init(const int ij_size, const int max_atoms, const int max_nbors); void resize(const int nlocal, const int max_nbor, bool &success); /// Free all memory on host and device void clear(); /// Bytes per atom used on device int bytes_per_atom(const int max_nbors) const; /// Total host memory used by class double host_memory_usage() const; /// Reset neighbor data (first time or from a rebuild) void reset(const int inum, int *ilist, const int *numj, cudaStream_t &s); /// Add neighbor data from host inline void add(const int num_ij, cudaStream_t &s) { host_ij.copy_to_device(ij.begin()+ij_total,num_ij,s); ij_total+=num_ij; } /// Pack neighbors satisfying cutoff by atom for coalesced access void pack_nbors(const int GX, const int BX, const int start, const int inum, const int form_low, const int form_high); // ------------------------------- Data ------------------------------- // Store IJ interactions on device NVC_VecI ij; // Buffer for moving ij data to GPU NVC_HostI host_ij; // --------------- Atom neighbors // 3 x n // - 1st row is i // - 2nd row is numj (number of neighbors) // - 3rd row is starting address in host_ij of neighbors NVC_VecI dev_nbor; // --------------- Timing Stuff NVCTimer time_nbor; int ij_total; private: bool allocated, _use_packing; }; #endif