/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov Peng Wang (Nvidia), penwang@nvidia.com Paul Crozier (SNL), pscrozi@sandia.gov ------------------------------------------------------------------------- */ #include "pair_gpu_nbor.h" int PairGPUNbor::bytes_per_atom(const int max_nbors) const { if (_use_packing) return (max_nbors*2+4)*sizeof(int); else return (max_nbors+3)*sizeof(int); } bool PairGPUNbor::init(const int ij_size, const int max_atoms, const int max_nbors) { bool success=true; if (allocated) clear(); // Initialize timers for the selected GPU time_nbor.init(); if (_use_packing) success=success && dev_nbor.alloc((max_nbors+4)*max_atoms); else success=success && dev_nbor.alloc(3*max_atoms); success=success && ij.alloc(max_nbors*max_atoms); success=success && host_ij.alloc_w(ij_size); allocated=true; return success; } void PairGPUNbor::resize(const int nlocal, const int max_nbor, bool &success) { dev_nbor.clear(); ij.clear(); if (_use_packing) success=success && dev_nbor.alloc((max_nbor+4)*nlocal); else success=success && dev_nbor.alloc(3*nlocal); success=success && ij.alloc(max_nbor*nlocal); allocated=true; } void PairGPUNbor::clear() { if (!allocated) return; allocated=false; ij.clear(); host_ij.clear(); dev_nbor.clear(); } double PairGPUNbor::host_memory_usage() const { return IJ_SIZE*sizeof(int)+sizeof(PairGPUNbor); } void PairGPUNbor::reset(const int inum, int *ilist, const int *numj, cudaStream_t &s) { ij_total=0; dev_nbor.copy_from_host(ilist,inum); int acc=0; int ij_size=host_ij.numel(); if (inum*20) { host_ij.copy_to_device(dev_nbor.begin()+inum+offset,hi,s); host_ij.copy_to_device(half,dev_nbor.begin()+2*inum+offset,hi,s); } } }