/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator Original Version: http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level LAMMPS directory. ----------------------------------------------------------------------- USER-CUDA Package and associated modifications: https://sourceforge.net/projects/lammpscuda/ Christian Trott, christian.trott@tu-ilmenau.de Lars Winterfeld, lars.winterfeld@tu-ilmenau.de Theoretical Physics II, University of Technology Ilmenau, Germany See the README file in the USER-CUDA directory. This software is distributed under the GNU General Public License. ------------------------------------------------------------------------- */ #define RIMLARGER 1.000001 #define RIMSMALLER 0.999999 #define SMALL 1e-5 extern __shared__ int shared[]; template __global__ void Cuda_AtomVecCuda_PackComm_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,void* buffer) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; int* list=sendlist+iswap*maxlistlength; if(i_nmax) _flag[0]=1; int k=0; if(data_mask & X_MASK){ ((X_FLOAT*) buffer)[i+k*n]=_x[j] + dx; k++; ((X_FLOAT*) buffer)[i+k*n] = _x[j+_nmax] + dy; k++; ((X_FLOAT*) buffer)[i+k*n] = _x[j+2*_nmax] + dz; k++;} if(data_mask & V_MASK){ ((X_FLOAT*) buffer)[i+k*n]=_v[j]; k++; ((X_FLOAT*) buffer)[i+k*n] = _v[j+_nmax]; k++; ((X_FLOAT*) buffer)[i+k*n] = _v[j+2*_nmax]; k++;} if(data_mask & OMEGA_MASK){ ((X_FLOAT*) buffer)[i+k*n]=_omega[j]; k++; ((X_FLOAT*) buffer)[i+k*n] = _omega[j+_nmax]; k++; ((X_FLOAT*) buffer)[i+k*n] = _omega[j+2*_nmax]; k++;} if(data_mask & RADIUS_MASK) ((X_FLOAT*) buffer)[i+k*n]=_radius[j]; k++; if(data_mask & RMASS_MASK) ((X_FLOAT*) buffer)[i+k*n]=_rmass[j]; k++; } } template __global__ void Cuda_AtomVecCuda_PackComm_Self_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,int first) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; int* list=sendlist+iswap*maxlistlength; if(i __global__ void Cuda_AtomVecCuda_UnpackComm_Kernel(int n,int first,void* buffer) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; if(i (_x[i+dim*_nmax]); if (xdim_tmp < _sublo[dim] || xdim_tmp >= _subhi[dim]) { add=true; } } shared[threadIdx.x]=add?1:0; __syncthreads(); int nsend=0; if(threadIdx.x==0) { for(int k=0;k __global__ void Cuda_AtomVecCuda_PackExchange_Kernel(int nsend, int* copylist) { double* buf=(double*) _buffer; int k=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; if(k>=nsend) return; buf=&buf[1+k]; int i=static_cast (buf[0]); int j=copylist[k]; int m=1; if(data_mask & X_MASK){ buf[(m++)*nsend] = static_cast (_x[i]); buf[(m++)*nsend] = static_cast (_x[i+_nmax]); buf[(m++)*nsend] = static_cast (_x[i+2*_nmax]);} if(data_mask & V_MASK){ buf[(m++)*nsend] = _v[i]; buf[(m++)*nsend] = _v[i+_nmax]; buf[(m++)*nsend] = _v[i+2*_nmax];} if(data_mask & TAG_MASK) buf[(m++)*nsend] = _tag[i]; if(data_mask & TYPE_MASK) buf[(m++)*nsend] = _type[i]; if(data_mask & MASK_MASK) buf[(m++)*nsend] = _mask[i]; if(data_mask & IMAGE_MASK) buf[(m++)*nsend] = _image[i]; if(data_mask & Q_MASK) buf[(m++)*nsend] = _q[i]; if(data_mask & MOLECULE_MASK) buf[(m++)*nsend] = _molecule[i]; if(data_mask & RADIUS_MASK) buf[(m++)*nsend] = _radius[i]; if(data_mask & DENSITY_MASK) buf[(m++)*nsend] = _density[i]; if(data_mask & RMASS_MASK) buf[(m++)*nsend] = _rmass[i]; if(data_mask & OMEGA_MASK) { buf[(m++)*nsend] = _omega[i]; buf[(m++)*nsend] = _omega[i+_nmax]; buf[(m++)*nsend] = _omega[i+2*_nmax];} /* if(data_mask & NSPECIAL_MASK) { buf[(m++)*nsend] = _nspecial[i]; buf[(m++)*nsend] = _nspecial[i+_nmax]; buf[(m++)*nsend] = _nspecial[i+2* _nmax]; }*/ if(i>=_nlocal) return; if(data_mask & X_MASK){ _x[i] = _x[j]; _x[i+_nmax] = _x[j+_nmax]; _x[i+2*_nmax] = _x[j+2*_nmax];} if(data_mask & V_MASK){ _v[i] = _v[j]; _v[i+_nmax] = _v[j+_nmax]; _v[i+2*_nmax] = _v[j+2*_nmax];} if(data_mask & TAG_MASK) _tag[i] = _tag[j]; if(data_mask & TYPE_MASK) _type[i] = _type[j]; if(data_mask & MASK_MASK) _mask[i] = _mask[j]; if(data_mask & IMAGE_MASK) _image[i] = _image[j]; if(data_mask & Q_MASK) _q[i] = _q[j]; if(data_mask & MOLECULE_MASK) _molecule[i]= _molecule[j]; if(data_mask & RADIUS_MASK) _radius[i] = _radius[j]; if(data_mask & DENSITY_MASK) _density[i] = _density[j]; if(data_mask & RMASS_MASK) _rmass[i] = _rmass[j]; if(data_mask & OMEGA_MASK) { _omega[i] = _omega[j]; _omega[i+_nmax] = _omega[j+_nmax]; _omega[i+2*_nmax] = _omega[j+2*_nmax]; } /* if(data_mask & NSPECIAL_MASK) { _nspecial[i] = _nspecial[j]; _nspecial[i+_nmax] = _nspecial[j+_nmax]; _nspecial[i+2* _nmax] = _nspecial[j+2* _nmax]; }*/ } template __global__ void Cuda_AtomVecCuda_UnpackExchange_Kernel(int dim,int nsend,int* copylist) { double* buf=(double*) _buffer; int k=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; if(k>=nsend) return; buf=&buf[1+k]; int i=-1; double xdim_tmp = buf[(1+dim)*nsend]; if(xdim_tmp>=_sublo[dim]-SMALL && xdim_tmp<_subhi[dim]+SMALL) { i=atomicAdd(_flag,1)+_nlocal; int m=1; if(data_mask & X_MASK){ _x[i] = buf[(m++)*nsend]; _x[i+_nmax] = buf[(m++)*nsend]; _x[i+2*_nmax] = buf[(m++)*nsend];} if(data_mask & V_MASK){ _v[i] = buf[(m++)*nsend]; _v[i+_nmax] = buf[(m++)*nsend]; _v[i+2*_nmax] = buf[(m++)*nsend];} if(data_mask & TAG_MASK) _tag[i] = buf[(m++)*nsend]; if(data_mask & TYPE_MASK) _type[i] = buf[(m++)*nsend]; if(data_mask & MASK_MASK) _mask[i] = buf[(m++)*nsend]; if(data_mask & IMAGE_MASK) _image[i] = buf[(m++)*nsend]; if(data_mask & Q_MASK) _q[i] = buf[(m++)*nsend]; if(data_mask & MOLECULE_MASK) _molecule[i] = buf[(m++)*nsend]; if(data_mask & RADIUS_MASK) _radius[i] = buf[(m++)*nsend]; if(data_mask & DENSITY_MASK) _density[i] = buf[(m++)*nsend]; if(data_mask & RMASS_MASK) _rmass[i] = buf[(m++)*nsend]; if(data_mask & OMEGA_MASK) { _omega[i] = buf[(m++)*nsend]; _omega[i+_nmax] = buf[(m++)*nsend]; _omega[i+2*_nmax] = buf[(m++)*nsend]; } /* if(data_mask & NSPECIAL_MASK) { _nspecial[i] = buf[(m++)*nsend]; _nspecial[i+_nmax] = buf[(m++)*nsend]; _nspecial[i+2*_nmax] = buf[(m++)*nsend]; }*/ } copylist[k]=i; } template __global__ void Cuda_AtomVecCuda_PackBorder_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; int* list=sendlist+iswap*maxlistlength; if(i __global__ void Cuda_AtomVecCuda_PackBorder_Self_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,int first) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; int* list=sendlist+iswap*maxlistlength; if(i __global__ void Cuda_AtomVecCuda_UnpackBorder_Kernel(int n,int first) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; if(i (((X_FLOAT*) _buffer)[i+(m++)*n]); if(data_mask & TYPE_MASK) _type[i+first] = static_cast (((X_FLOAT*) _buffer)[i+(m++)*n]); if(data_mask & MASK_MASK) _mask[i+first] = static_cast (((X_FLOAT*) _buffer)[i+(m++)*n]); if(data_mask & Q_MASK) _q[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n]; if(data_mask & MOLECULE_MASK) _molecule[i+first] = static_cast (((X_FLOAT*) _buffer)[i+(m++)*n]); if(data_mask & RADIUS_MASK) _radius[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n]; if(data_mask & DENSITY_MASK) _density[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n]; if(data_mask & RMASS_MASK) _rmass[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n]; if(data_mask & OMEGA_MASK) { _omega[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n]; _omega[i+first+_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n]; _omega[i+first+2*_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n];} } else { _flag[0]=1; } } }