/*************************************************************************** pair_gpu_atom.cu ------------------- W. Michael Brown Memory routines for moving atom and force data between host and gpu __________________________________________________________________________ This file is part of the LAMMPS GPU Library __________________________________________________________________________ begin : Tue Aug 4 2009 copyright : (C) 2009 by W. Michael Brown email : wmbrown@sandia.gov ***************************************************************************/ /* ----------------------------------------------------------------------- Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. ----------------------------------------------------------------------- */ #include "pair_gpu_texture.h" #include "pair_gpu_atom.h" #define PairGPUAtomT PairGPUAtom template int PairGPUAtomT::bytes_per_atom() const { return atom_fields()*sizeof(numtyp)+ans_fields()*sizeof(acctyp); } template void PairGPUAtomT::init(const int max_atoms) { if (allocated) clear(); _max_atoms=max_atoms; // Initialize timers for the selected GPU time_atom.init(); time_answer.init(); // Device matrices for atom and force data dev_x.safe_alloc(atom_fields(),max_atoms,x_get_texture()); ans.safe_alloc(ans_fields(),max_atoms); // Get a host write only buffer host_write.safe_alloc_w(max_atoms*atom_fields()); // Get a host read/write buffer host_read.safe_alloc_rw(ans.row_size()*ans_fields()); allocated=true; } template void PairGPUAtomT::clear() { if (!allocated) return; allocated=false; dev_x.unbind(); ans.clear(); host_write.clear(); host_read.clear(); dev_x.clear(); } template double PairGPUAtomT::host_memory_usage(const int max_atoms) const { return max_atoms*atom_fields()*sizeof(numtyp)+ ans_fields()*(max_atoms)*sizeof(acctyp)+ sizeof(PairGPUAtom); } template void PairGPUAtomT::copy_answers(const bool eflag, const bool vflag, cudaStream_t &s) { _eflag=eflag; _vflag=vflag; int csize=ans_fields(); if (!eflag) csize--; if (!vflag) csize-=6; host_read.copy_from_device(ans.begin(),ans.row_size()*csize,s); } template double PairGPUAtomT::energy_virial(const int *ilist, const bool eflag_atom, const bool vflag_atom, double *eatom, double **vatom, double *virial) { double evdwl=0.0; int gap=ans.row_size()-_inum; acctyp *ap=host_read.begin(); if (_eflag) { if (eflag_atom) { for (int i=0; i<_inum; i++) { evdwl+=*ap; eatom[ilist[i]]+=*ap*0.5; ap++; } } else for (int i=0; i<_inum; i++) { evdwl+=*ap; ap++; } ap+=gap; evdwl*=0.5; } _read_loc=ap; gap=ans.row_size(); if (_vflag) { if (vflag_atom) { for (int ii=0; ii<_inum; ii++) { int i=ilist[ii]; ap=_read_loc+ii; for (int j=0; j<6; j++) { vatom[i][j]+=*ap*0.5; virial[j]+=*ap; ap+=gap; } } } else { for (int ii=0; ii<_inum; ii++) { ap=_read_loc+ii; for (int j=0; j<6; j++) { virial[j]+=*ap; ap+=gap; } } } for (int j=0; j<6; j++) virial[j]*=0.5; _read_loc+=gap*6; } return evdwl; } template void PairGPUAtomT::add_forces(const int *ilist, double **f) { int gap=ans.row_size(); for (int ii=0; ii<_inum; ii++) { acctyp *ap=_read_loc+ii; int i=ilist[ii]; f[i][0]+=*ap; ap+=gap; f[i][1]+=*ap; ap+=gap; f[i][2]+=*ap; } } template void PairGPUAtomT::add_torques(const int *ilist, double **tor, const int n) { int gap=ans.row_size(); _read_loc+=gap*3; for (int ii=0; ii;