/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator Original Version: http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level LAMMPS directory. ----------------------------------------------------------------------- USER-CUDA Package and associated modifications: https://sourceforge.net/projects/lammpscuda/ Christian Trott, christian.trott@tu-ilmenau.de Lars Winterfeld, lars.winterfeld@tu-ilmenau.de Theoretical Physics II, University of Technology Ilmenau, Germany See the README file in the USER-CUDA directory. This software is distributed under the GNU General Public License. ------------------------------------------------------------------------- */ extern __shared__ X_FLOAT sharedmem[]; #define BIG 1e10 __global__ void Domain_PBC_Kernel(int deform_remap,int deform_groupbit,int box_change) { int idim,otherdims; int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; X_FLOAT lo[3]; X_FLOAT hi[3]; X_FLOAT* period; if (_triclinic == 0) { lo[0] = _boxlo[0]; lo[1] = _boxlo[1]; lo[2] = _boxlo[2]; hi[0] = _boxhi[0]; hi[1] = _boxhi[1]; hi[2] = _boxhi[2]; period = _prd; } else { lo[0] = _boxlo_lamda[0]; lo[1] = _boxlo_lamda[1]; lo[2] = _boxlo_lamda[2]; hi[0] = _boxhi_lamda[0]; hi[1] = _boxhi_lamda[1]; hi[2] = _boxhi_lamda[2]; period = _prd_lamda; } X_FLOAT tmpx=X_F(0.5)*(hi[0]+lo[0]); X_FLOAT tmpy=X_F(0.5)*(hi[1]+lo[1]); X_FLOAT tmpz=X_F(0.5)*(hi[2]+lo[2]); X_FLOAT* buf=(X_FLOAT*) _buffer; buf+=blockIdx.x*gridDim.y+blockIdx.y; buf[0]=tmpx; buf+=gridDim.x*gridDim.y; buf[0]=tmpx; buf+=gridDim.x*gridDim.y; buf[0]=tmpy; buf+=gridDim.x*gridDim.y; buf[0]=tmpy; buf+=gridDim.x*gridDim.y; buf[0]=tmpz; buf+=gridDim.x*gridDim.y; buf[0]=tmpz; if(i<_nlocal) { if (_periodicity[0]) { if (_x[i] < lo[0]) { _x[i] += period[0]; if (deform_remap && _mask[i] & deform_groupbit) _v[i] += _h_rate[0]; idim = _image[i] & 1023; otherdims = _image[i] ^ idim; idim--; idim &= 1023; _image[i] = otherdims | idim; } if (_x[i] >= hi[0]) { _x[i] -= period[0]; _x[i] = MAX(_x[i],lo[0]); if (deform_remap && _mask[i] & deform_groupbit) _v[i] -= _h_rate[0]; idim = _image[i] & 1023; otherdims = _image[i] ^ idim; idim++; idim &= 1023; _image[i] = otherdims | idim; } } if (_periodicity[1]) { if (_x[i+_nmax] < lo[1]) { _x[i+_nmax] += period[1]; if (deform_remap && _mask[i] & deform_groupbit) { _v[i] += _h_rate[5]; _v[i+_nmax] += _h_rate[1]; } idim = (_image[i] >> 10) & 1023; otherdims = _image[i] ^ (idim << 10); idim--; idim &= 1023; _image[i] = otherdims | (idim << 10); } if (_x[i+_nmax] >= hi[1]) { _x[i+_nmax] -= period[1]; _x[i+_nmax] = MAX(_x[i+_nmax],lo[1]); if (deform_remap && _mask[i] & deform_groupbit) { _v[i] -= _h_rate[5]; _v[i+_nmax] -= _h_rate[1]; } idim = (_image[i] >> 10) & 1023; otherdims = _image[i] ^ (idim << 10); idim++; idim &= 1023; _image[i] = otherdims | (idim << 10); } } if (_periodicity[2]) { if (_x[i+2*_nmax] < lo[2]) { _x[i+2*_nmax] += period[2]; if (deform_remap && _mask[i] & deform_groupbit) { _v[i] += _h_rate[4]; _v[i+_nmax] += _h_rate[3]; _v[i+2*_nmax] += _h_rate[2]; } idim = _image[i] >> 20; otherdims = _image[i] ^ (idim << 20); idim--; idim &= 1023; _image[i] = otherdims | (idim << 20); } if (_x[i+2*_nmax] >= hi[2]) { _x[i+2*_nmax] -= period[2]; _x[i+2*_nmax] = MAX(_x[i+2*_nmax],lo[2]); if (deform_remap && _mask[i] & deform_groupbit) { _v[i] -= _h_rate[4]; _v[i+_nmax] -= _h_rate[3]; _v[i+2*_nmax] -= _h_rate[2]; } idim = _image[i] >> 20; otherdims = _image[i] ^ (idim << 20); idim++; idim &= 1023; _image[i] = otherdims | (idim << 20); } } if(box_change) { tmpx=_x[i]; tmpy=_x[i+_nmax]; tmpz=_x[i+2*_nmax]; } } __syncthreads(); if(box_change) { X_FLOAT minx=BIG; X_FLOAT maxx=-BIG; X_FLOAT miny=BIG; X_FLOAT maxy=-BIG; X_FLOAT minz=BIG; X_FLOAT maxz=-BIG; if (not _periodicity[0]) { sharedmem[threadIdx.x]=tmpx; minOfBlock(sharedmem); minx=sharedmem[0]; __syncthreads(); sharedmem[threadIdx.x]=tmpx; maxOfBlock(sharedmem); maxx=sharedmem[0]; __syncthreads(); } else {minx=lo[0];maxx=hi[0];} if (not _periodicity[1]) { sharedmem[threadIdx.x]=tmpy; minOfBlock(sharedmem); miny=sharedmem[0]; __syncthreads(); sharedmem[threadIdx.x]=tmpy; maxOfBlock(sharedmem); maxy=sharedmem[0]; __syncthreads(); } else {minx=lo[1];maxx=hi[1];} if (not _periodicity[2]) { sharedmem[threadIdx.x]=tmpz; minOfBlock(sharedmem); minz=sharedmem[0]; __syncthreads(); sharedmem[threadIdx.x]=tmpz; maxOfBlock(sharedmem); maxz=sharedmem[0]; __syncthreads(); } else {minz=lo[2];maxz=hi[2];} if(threadIdx.x==0) { buf=(X_FLOAT*) _buffer; buf+=blockIdx.x*gridDim.y+blockIdx.y; buf[0]=minx; buf+=gridDim.x*gridDim.y; buf[0]=maxx; buf+=gridDim.x*gridDim.y; buf[0]=miny; buf+=gridDim.x*gridDim.y; buf[0]=maxy; buf+=gridDim.x*gridDim.y; buf[0]=minz; buf+=gridDim.x*gridDim.y; buf[0]=maxz; } } } __global__ void Domain_reduceBoxExtent(double* extent,int n) { X_FLOAT* buf=(X_FLOAT*) _buffer; buf+=blockIdx.x*n; copyGlobToShared(buf,sharedmem,n); if(blockIdx.x%2==0) minOfData(sharedmem,n); else maxOfData(sharedmem,n); extent[blockIdx.x]=sharedmem[0]; } __global__ void Domain_lamda2x_Kernel(int n) { int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x; if(i