/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov Peng Wang (Nvidia), penwang@nvidia.com Paul Crozier (SNL), pscrozi@sandia.gov ------------------------------------------------------------------------- */ #include "lj_gpu_memory.h" #define LJ_GPU_MemoryT LJ_GPU_Memory template int LJ_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { return atom.bytes_per_atom()+nbor.bytes_per_atom(max_nbors); } template bool LJ_GPU_MemoryT::init(const int ij_size, const int ntypes, double **host_cutsq, double **host_sigma, double **host_epsilon, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **host_offset, double *host_special_lj, const int max_nbors, const int me, const int nlocal, const int nall) { if (allocated) clear(); if (me>=gpu.num_devices()) return false; gpu.set(me); if (gpu.revision()<1.0) return false; // Initialize timers for the selected GPU time_pair.init(); // Initialize atom and nbor data max_local=static_cast(static_cast(nlocal)*1.10); if (max_local==0) max_local=1000; if (nall<=nlocal) max_atoms=max_local*2; else max_atoms=static_cast(static_cast(nall)*1.10); if (!atom.init(max_atoms)) return false; if (!nbor.init(ij_size,max_local,max_nbors)) return false; // Get a stream for computing pair potentials CUDA_SAFE_CALL(cudaStreamCreate(&pair_stream)); // Use the write buffer from atom for data initialization NVC_HostT &host_write=atom.host_write; assert(host_write.numel()>4 && host_write.numel()>ntypes*ntypes*2); // Copy data for bonded interactions special_lj.safe_alloc(4); special_lj.cast_copy(host_special_lj,host_write); // Copy sigma, epsilon, and cutsq onto GPU sigma.safe_alloc(ntypes,ntypes,sigma_get_texture()); sigma.cast_copy(host_sigma[0],host_write); epsilon.safe_alloc(ntypes,ntypes,epsilon_get_texture()); epsilon.cast_copy(host_epsilon[0],host_write); cutsq.safe_alloc(ntypes,ntypes,cutsq_get_texture()); cutsq.cast_copy(host_cutsq[0],host_write); // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; shared_types=false; if (lj_types<=MAX_SHARED_TYPES) { lj_types=MAX_SHARED_TYPES; shared_types=true; } offset.safe_alloc(lj_types,lj_types,offset_get_texture()); offset.cast_copy2D(host_offset[0],host_write,ntypes,ntypes); double *t1=host_lj1[0]; double *t2=host_lj2[0]; for (int i=0; i()); lj1.copy_2Dfrom_host(reinterpret_cast::vec2 *> (host_write.begin()), ntypes,ntypes); t1=host_lj3[0]; t2=host_lj4[0]; for (int i=0; i()); lj3.copy_2Dfrom_host(reinterpret_cast::vec2 *> (host_write.begin()), ntypes,ntypes); dev_error.safe_alloc(1); dev_error.zero(); allocated=true; return true; } template void LJ_GPU_MemoryT::clear() { if (!allocated) return; allocated=false; // Check for any pair style specific errors here int err_flag; dev_error.copy_to_host(&err_flag); atom.clear(); nbor.clear(); CUDA_SAFE_CALL(cudaStreamDestroy(pair_stream)); dev_error.clear(); sigma.clear(); epsilon.clear(); special_lj.clear(); cutsq.clear(); offset.clear(); lj1.clear(); lj3.clear(); } template double LJ_GPU_MemoryT::host_memory_usage() const { return atom.host_memory_usage(max_atoms)+nbor.host_memory_usage()+ sizeof(LJ_GPU_Memory); } template class LJ_GPU_Memory;