git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8921 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,380 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef _CUDA_SHARED_H_
|
||||
#define _CUDA_SHARED_H_
|
||||
#include "cuda_precision.h"
|
||||
|
||||
#define CUDA_MAX_DEBUG_SIZE 1000 //size of debugdata array (allows for so many doubles or twice as many int)
|
||||
|
||||
struct dev_array
|
||||
{
|
||||
void* dev_data; // pointer to memory address on cuda device
|
||||
unsigned dim[3]; // array dimensions
|
||||
};
|
||||
|
||||
struct cuda_shared_atom // relevent data from atom class
|
||||
{
|
||||
dev_array dx; // cumulated distance for binning settings
|
||||
dev_array x; // position
|
||||
dev_array v; // velocity
|
||||
dev_array f; // force
|
||||
dev_array tag;
|
||||
dev_array type; // global ID number, there are ghosttype = ntypes (ntypescuda=ntypes+1)
|
||||
dev_array mask;
|
||||
dev_array image;
|
||||
dev_array q; // charges
|
||||
dev_array mass; // per-type masses
|
||||
dev_array rmass; // per-atom masses
|
||||
dev_array radius; // per-atom radius
|
||||
dev_array density;
|
||||
dev_array omega;
|
||||
dev_array torque;
|
||||
dev_array molecule;
|
||||
|
||||
dev_array special;
|
||||
int maxspecial;
|
||||
dev_array nspecial;
|
||||
int* special_flag;
|
||||
int molecular;
|
||||
|
||||
dev_array eatom; // per-atom energy
|
||||
dev_array vatom; // per-atom virial
|
||||
int need_eatom;
|
||||
int need_vatom;
|
||||
|
||||
dev_array x_type; // position + type in X_FLOAT4 struct
|
||||
dev_array v_radius; // velociyt + radius in V_FLOAT4 struct currently only used for granular atom_style
|
||||
dev_array omega_rmass; // velociyt + radius in V_FLOAT4 struct currently only used for granular atom_style
|
||||
|
||||
double* mass_host; // remember per-type host pointer to masses
|
||||
//int natoms; // total # of atoms in system, could be 0
|
||||
int nghost; // and ghost atoms on this proc
|
||||
int nlocal; // # of owned
|
||||
int nall; // total # of atoms in this proc
|
||||
int nmax; // max # of owned+ghost in arrays on this proc
|
||||
int ntypes;
|
||||
int q_flag; // do we have charges?
|
||||
int rmass_flag; // do we have per-atom masses?
|
||||
int firstgroup;
|
||||
int nfirst;
|
||||
|
||||
int update_nlocal;
|
||||
int update_nmax;
|
||||
int update_neigh;
|
||||
|
||||
dev_array xhold; // position at last neighboring
|
||||
X_FLOAT triggerneighsq; // maximum square movement before reneighboring
|
||||
int reneigh_flag; // is reneighboring necessary
|
||||
int maxhold; // size of xhold
|
||||
int dist_check; //perform distance check for reneighboring
|
||||
dev_array binned_id; //id of each binned atom (not tag!!)
|
||||
dev_array binned_idnew; //new id of each binned atom for sorting basically setting atom[binned_id[k]] at atom[binned_newid[k]]
|
||||
float bin_extraspace;
|
||||
int bin_dim[3];
|
||||
int bin_nmax;
|
||||
dev_array map_array;
|
||||
};
|
||||
|
||||
struct cuda_shared_pair // relevent data from pair class
|
||||
{
|
||||
char cudable_force; // check for (cudable_force!=0)
|
||||
X_FLOAT cut_global;
|
||||
X_FLOAT cut_inner_global;
|
||||
X_FLOAT cut_coul_global;
|
||||
double** cut; // type-type cutoff
|
||||
double** cutsq; // type-type cutoff
|
||||
double** cut_inner; // type-type cutoff for coul
|
||||
double** cut_coul; // type-type cutoff for coul
|
||||
double** coeff1; // tpye-type pair parameters
|
||||
double** coeff2;
|
||||
double** coeff3;
|
||||
double** coeff4;
|
||||
double** coeff5;
|
||||
double** coeff6;
|
||||
double** coeff7;
|
||||
double** coeff8;
|
||||
double** coeff9;
|
||||
double** coeff10;
|
||||
double** offset;
|
||||
double* special_lj;
|
||||
double* special_coul;
|
||||
dev_array virial; // ENERGY_FLOAT
|
||||
dev_array eng_vdwl; // ENERGY_FLOAT
|
||||
dev_array eng_coul; // ENERGY_FLOAT
|
||||
X_FLOAT cut_coulsq_global;
|
||||
F_FLOAT g_ewald,kappa;
|
||||
int freeze_group_bit;
|
||||
|
||||
dev_array coeff1_gm;
|
||||
dev_array coeff2_gm;
|
||||
dev_array coeff3_gm;
|
||||
dev_array coeff4_gm;
|
||||
dev_array coeff5_gm;
|
||||
dev_array coeff6_gm;
|
||||
dev_array coeff7_gm;
|
||||
dev_array coeff8_gm;
|
||||
dev_array coeff9_gm;
|
||||
dev_array coeff10_gm;
|
||||
|
||||
int lastgridsize;
|
||||
int n_energy_virial;
|
||||
int collect_forces_later;
|
||||
int use_block_per_atom;
|
||||
int override_block_per_atom;
|
||||
bool neighall;
|
||||
|
||||
};
|
||||
|
||||
struct cuda_shared_domain // relevent data from domain class
|
||||
{
|
||||
X_FLOAT sublo[3]; // orthogonal box -> sub-box bounds on this proc
|
||||
X_FLOAT subhi[3];
|
||||
X_FLOAT boxlo[3];
|
||||
X_FLOAT boxhi[3];
|
||||
X_FLOAT prd[3];
|
||||
int periodicity[3]; // xyz periodicity as array
|
||||
|
||||
int triclinic;
|
||||
X_FLOAT xy;
|
||||
X_FLOAT xz;
|
||||
X_FLOAT yz;
|
||||
X_FLOAT boxlo_lamda[3];
|
||||
X_FLOAT boxhi_lamda[3];
|
||||
X_FLOAT prd_lamda[3];
|
||||
X_FLOAT h[6];
|
||||
X_FLOAT h_inv[6];
|
||||
V_FLOAT h_rate[6];
|
||||
int update;
|
||||
};
|
||||
|
||||
struct cuda_shared_pppm
|
||||
{
|
||||
char cudable_force;
|
||||
#ifdef FFT_CUFFT
|
||||
FFT_FLOAT* work1;
|
||||
FFT_FLOAT* work2;
|
||||
FFT_FLOAT* work3;
|
||||
PPPM_FLOAT* greensfn;
|
||||
PPPM_FLOAT* fkx;
|
||||
PPPM_FLOAT* fky;
|
||||
PPPM_FLOAT* fkz;
|
||||
PPPM_FLOAT* vg;
|
||||
#endif
|
||||
int* part2grid;
|
||||
PPPM_FLOAT* density_brick;
|
||||
int* density_brick_int;
|
||||
PPPM_FLOAT density_intScale;
|
||||
PPPM_FLOAT* vdx_brick;
|
||||
PPPM_FLOAT* vdy_brick;
|
||||
PPPM_FLOAT* vdz_brick;
|
||||
PPPM_FLOAT* density_fft;
|
||||
ENERGY_FLOAT* energy;
|
||||
ENERGY_FLOAT* virial;
|
||||
int nxlo_in;
|
||||
int nxhi_in;
|
||||
int nxlo_out;
|
||||
int nxhi_out;
|
||||
int nylo_in;
|
||||
int nyhi_in;
|
||||
int nylo_out;
|
||||
int nyhi_out;
|
||||
int nzlo_in;
|
||||
int nzhi_in;
|
||||
int nzlo_out;
|
||||
int nzhi_out;
|
||||
int nx_pppm;
|
||||
int ny_pppm;
|
||||
int nz_pppm;
|
||||
PPPM_FLOAT qqrd2e;
|
||||
int order;
|
||||
// float3 sublo;
|
||||
PPPM_FLOAT* rho_coeff;
|
||||
int nmax;
|
||||
int nlocal;
|
||||
PPPM_FLOAT* debugdata;
|
||||
PPPM_FLOAT delxinv;
|
||||
PPPM_FLOAT delyinv;
|
||||
PPPM_FLOAT delzinv;
|
||||
int nlower;
|
||||
int nupper;
|
||||
PPPM_FLOAT shiftone;
|
||||
PPPM_FLOAT3* fH;
|
||||
};
|
||||
|
||||
struct cuda_shared_comm
|
||||
{
|
||||
int maxswap;
|
||||
int maxlistlength;
|
||||
dev_array pbc;
|
||||
dev_array slablo;
|
||||
dev_array slabhi;
|
||||
dev_array multilo;
|
||||
dev_array multihi;
|
||||
dev_array sendlist;
|
||||
int grow_flag;
|
||||
int comm_phase;
|
||||
|
||||
int nsend;
|
||||
int* nsend_swap;
|
||||
int* send_size;
|
||||
int* recv_size;
|
||||
double** buf_send;
|
||||
void** buf_send_dev;
|
||||
double** buf_recv;
|
||||
void** buf_recv_dev;
|
||||
void* buffer;
|
||||
int buffer_size;
|
||||
double overlap_split_ratio;
|
||||
};
|
||||
|
||||
struct cuda_shared_neighlist // member of CudaNeighList, has no instance in cuda_shared_data
|
||||
{
|
||||
int maxlocal;
|
||||
int inum; // # of I atoms neighbors are stored for local indices of I atoms
|
||||
int inum_border2;
|
||||
dev_array inum_border; // # of atoms which interact with border atoms
|
||||
dev_array ilist;
|
||||
dev_array ilist_border;
|
||||
dev_array numneigh;
|
||||
dev_array numneigh_inner;
|
||||
dev_array numneigh_border;
|
||||
dev_array firstneigh;
|
||||
dev_array neighbors;
|
||||
dev_array neighbors_border;
|
||||
dev_array neighbors_inner;
|
||||
int maxpage;
|
||||
dev_array page_pointers;
|
||||
dev_array* pages;
|
||||
int maxneighbors;
|
||||
int neigh_lists_per_page;
|
||||
double** cutneighsq;
|
||||
CUDA_FLOAT* cu_cutneighsq;
|
||||
int* binned_id;
|
||||
int* bin_dim;
|
||||
int bin_nmax;
|
||||
float bin_extraspace;
|
||||
double maxcut;
|
||||
dev_array ex_type;
|
||||
int nex_type;
|
||||
dev_array ex1_bit;
|
||||
dev_array ex2_bit;
|
||||
int nex_group;
|
||||
dev_array ex_mol_bit;
|
||||
int nex_mol;
|
||||
|
||||
};
|
||||
|
||||
struct cuda_compile_settings // this is used to compare compile settings (i.e. precision) of the cu files, and the cpp files
|
||||
{
|
||||
int prec_glob;
|
||||
int prec_x;
|
||||
int prec_v;
|
||||
int prec_f;
|
||||
int prec_pppm;
|
||||
int prec_fft;
|
||||
int cufft;
|
||||
int arch;
|
||||
};
|
||||
|
||||
struct cuda_timings_struct
|
||||
{
|
||||
//Debug:
|
||||
double test1;
|
||||
double test2;
|
||||
//transfers
|
||||
double transfer_upload_tmp_constr;
|
||||
double transfer_download_tmp_deconstr;
|
||||
|
||||
//communication
|
||||
double comm_forward_total;
|
||||
double comm_forward_mpi_upper;
|
||||
double comm_forward_mpi_lower;
|
||||
double comm_forward_kernel_pack;
|
||||
double comm_forward_kernel_unpack;
|
||||
double comm_forward_kernel_self;
|
||||
double comm_forward_upload;
|
||||
double comm_forward_download;
|
||||
|
||||
double comm_exchange_total;
|
||||
double comm_exchange_mpi;
|
||||
double comm_exchange_kernel_pack;
|
||||
double comm_exchange_kernel_unpack;
|
||||
double comm_exchange_kernel_fill;
|
||||
double comm_exchange_cpu_pack;
|
||||
double comm_exchange_upload;
|
||||
double comm_exchange_download;
|
||||
|
||||
double comm_border_total;
|
||||
double comm_border_mpi;
|
||||
double comm_border_kernel_pack;
|
||||
double comm_border_kernel_unpack;
|
||||
double comm_border_kernel_self;
|
||||
double comm_border_kernel_buildlist;
|
||||
double comm_border_upload;
|
||||
double comm_border_download;
|
||||
|
||||
//pair forces
|
||||
double pair_xtype_conversion;
|
||||
double pair_kernel;
|
||||
double pair_virial;
|
||||
double pair_force_collection;
|
||||
|
||||
//neighbor
|
||||
double neigh_bin;
|
||||
double neigh_build;
|
||||
double neigh_special;
|
||||
|
||||
//PPPM
|
||||
double pppm_particle_map;
|
||||
double pppm_make_rho;
|
||||
double pppm_brick2fft;
|
||||
double pppm_poisson;
|
||||
double pppm_fillbrick;
|
||||
double pppm_fieldforce;
|
||||
double pppm_compute;
|
||||
|
||||
};
|
||||
|
||||
struct cuda_shared_data // holds space for all relevent data from the different classes
|
||||
{
|
||||
void* buffer; //holds temporary GPU data [data used in subroutines, which has not to be consistend outside of that routine]
|
||||
int buffersize; //maxsize of buffer
|
||||
int buffer_new; //should be 1 if the pointer to buffer has changed
|
||||
void* flag;
|
||||
void* debugdata; //array for easily collecting debugdata from device class cuda contains the corresponding cu_debugdata and host array
|
||||
cuda_shared_atom atom;
|
||||
cuda_shared_pair pair;
|
||||
cuda_shared_domain domain;
|
||||
cuda_shared_pppm pppm;
|
||||
cuda_shared_comm comm;
|
||||
cuda_compile_settings compile_settings;
|
||||
cuda_timings_struct cuda_timings;
|
||||
int exchange_dim;
|
||||
int me; //mpi rank
|
||||
unsigned int datamask;
|
||||
int overlap_comm;
|
||||
};
|
||||
|
||||
|
||||
#endif // #ifndef _CUDA_SHARED_H_
|
||||
Reference in New Issue
Block a user