diff --git a/src/USER-CUDA/user_cuda.h b/src/USER-CUDA/user_cuda.h new file mode 100644 index 0000000000..2d4399b5c9 --- /dev/null +++ b/src/USER-CUDA/user_cuda.h @@ -0,0 +1,159 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + + Original Version: + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. + + ----------------------------------------------------------------------- + + USER-CUDA Package and associated modifications: + https://sourceforge.net/projects/lammpscuda/ + + Christian Trott, christian.trott@tu-ilmenau.de + Lars Winterfeld, lars.winterfeld@tu-ilmenau.de + Theoretical Physics II, University of Technology Ilmenau, Germany + + See the README file in the USER-CUDA directory. + + This software is distributed under the GNU General Public License. +------------------------------------------------------------------------- */ + +#ifndef CUDA_H +#define CUDA_H + +#include "pointers.h" +#include "cuda_shared.h" +#include "cuda_data.h" +#include "cuda_precision.h" +#include + +#ifdef _DEBUG +#define MYDBG(a) a +#else +#define MYDBG(a) +#endif + +namespace LAMMPS_NS +{ +class Cuda : protected Pointers +{ + public: + Cuda(class LAMMPS*); + ~Cuda(); + //static void setDevice(class LAMMPS*); + void allocate(); + + void accelerator(int, char**); + void activate(); + + void setSharedDataZero(); + void setSystemParams(); + + void setDomainParams(); + + void checkResize(); + void evsetup_eatom_vatom(int eflag_atom, int vflag_atom); + void uploadAll(); + void downloadAll(); + void upload(int datamask); + void download(int datamask); + void downloadX(); + + class CudaNeighList* registerNeighborList(class NeighList* neigh_list); + void uploadAllNeighborLists(); + void downloadAllNeighborLists(); + void set_neighinit(int dist_check, double triggerneighsq) { + shared_data.atom.dist_check = dist_check; + shared_data.atom.triggerneighsq = triggerneighsq; + } + bool decide_by_integrator() { + return neighbor_decide_by_integrator && cu_xhold && finished_setup; + } + void update_xhold(int &maxhold, double* xhold); + + void setTimingsZero(); + void print_timings(); + + void cu_x_download() { + cu_x->download(); + } + bool device_set; + bool dotiming; + bool dotestatom; + int testatom; + + double uploadtime, downloadtime; + bool finished_setup, begin_setup; + bool oncpu; + bool finished_run; + + int self_comm; + + int cuda_exists; + + double extent[6]; + int* debugdata; + // data shared between host code and device code + // (number of atoms, device pointers for up- & download) + cuda_shared_data shared_data; + + cCudaData* cu_q; + cCudaData* cu_f; + cCudaData* cu_mass; + cCudaData* cu_rmass; + cCudaData* cu_v; + cCudaData* cu_x; + cCudaData* cu_xhold; + cCudaData* cu_mask; + cCudaData* cu_tag; + cCudaData* cu_type; + cCudaData* cu_image; + cCudaData* cu_eatom; + cCudaData* cu_vatom; + cCudaData* cu_virial; + cCudaData* cu_eng_vdwl; + cCudaData* cu_eng_coul; + cCudaData* cu_extent; + int* binned_id; + cCudaData* cu_binned_id; + int* binned_idnew; + cCudaData* cu_binned_idnew; + cCudaData* cu_debugdata; + cCudaData* cu_radius; + cCudaData* cu_density; + cCudaData* cu_omega; + cCudaData* cu_torque; + cCudaData* cu_special; + cCudaData* cu_nspecial; + cCudaData* cu_molecule; + + + cCudaData* cu_x_type; + X_CFLOAT* x_type; + + cCudaData* cu_v_radius; + V_CFLOAT* v_radius; + + cCudaData* cu_omega_rmass; + V_CFLOAT* omega_rmass; + + cCudaData* cu_map_array; + int neighbor_decide_by_integrator; + + bool pinned; + + void* copy_buffer; + int copy_buffersize; + + private: + int pppn; // number of GPUs/node + int *devicelist; // IDs of GPUs + + std::map neigh_lists; +}; +} + +#endif // CUDA_H