/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator Original Version: http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level LAMMPS directory. ----------------------------------------------------------------------- USER-CUDA Package and associated modifications: https://sourceforge.net/projects/lammpscuda/ Christian Trott, christian.trott@tu-ilmenau.de Lars Winterfeld, lars.winterfeld@tu-ilmenau.de Theoretical Physics II, University of Technology Ilmenau, Germany See the README file in the USER-CUDA directory. This software is distributed under the GNU General Public License. ------------------------------------------------------------------------- */ __global__ void Cuda_CommCuda_PackComm_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,void* buffer) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; int* list=sendlist+iswap*maxlistlength; if(i_nmax) _flag[0]=1; ((X_FLOAT*) buffer)[i]=_x[j] + dx; ((X_FLOAT*) buffer)[i+1*n] = _x[j+_nmax] + dy; ((X_FLOAT*) buffer)[i+2*n] = _x[j+2*_nmax] + dz; } } __global__ void Cuda_CommCuda_PackCommVel_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,void* buffer) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; int* list=sendlist+iswap*maxlistlength; if(i_nmax) _flag[0]=1; ((X_FLOAT*) buffer)[i]=_x[j] + dx; ((X_FLOAT*) buffer)[i+1*n] = _x[j+_nmax] + dy; ((X_FLOAT*) buffer)[i+2*n] = _x[j+2*_nmax] + dz; ((X_FLOAT*) buffer)[i+3*n]=_v[j]; ((X_FLOAT*) buffer)[i+4*n] = _v[j+_nmax]; ((X_FLOAT*) buffer)[i+5*n] = _v[j+2*_nmax]; } } __global__ void Cuda_CommCuda_PackComm_Self_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,int first) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; int* list=sendlist+iswap*maxlistlength; if(i= 2) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x+nfirst; if(i= lo && _x[i+dim*_nmax] <= hi) { add=true; } shared[threadIdx.x]=add?1:0; __syncthreads(); int nsend=0; if(threadIdx.x==0) { for(int k=0;k= lo && _x[i+dim*_nmax] <= hi) { add=true; } shared[threadIdx.x]=add?1:0; __syncthreads(); int nsend=0; if(threadIdx.x==0) { for(int k=0;k= lo && _x[i+dim*_nmax] <= hi) { add=true; } shared[threadIdx.x]=add?1:0; __syncthreads(); nsend=0; if(threadIdx.x==0) { for(int k=0;k= 2) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x+nfirst; if(i= mlo[itype] && _x[i+dim*_nmax] <= mhi[itype]) { add=true; } } shared[threadIdx.x]=add?1:0; __syncthreads(); int nsend=0; if(threadIdx.x==0) { for(int k=0;k= mlo[itype] && _x[i+dim*_nmax] <= mhi[itype]) { add=true; } } shared[threadIdx.x]=add?1:0; __syncthreads(); int nsend=0; if(threadIdx.x==0) { for(int k=0;k= mlo[itype] && _x[i+dim*_nmax] <= mhi[itype]) { add=true; } } shared[threadIdx.x]=add?1:0; __syncthreads(); nsend=0; if(threadIdx.x==0) { for(int k=0;k