/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator Original Version: http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level LAMMPS directory. ----------------------------------------------------------------------- USER-CUDA Package and associated modifications: https://sourceforge.net/projects/lammpscuda/ Christian Trott, christian.trott@tu-ilmenau.de Lars Winterfeld, lars.winterfeld@tu-ilmenau.de Theoretical Physics II, University of Technology Ilmenau, Germany See the README file in the USER-CUDA directory. This software is distributed under the GNU General Public License. ------------------------------------------------------------------------- */ extern __shared__ X_CFLOAT sharedmem[]; #define BIG 1e10 __global__ void Domain_PBC_Kernel(int deform_remap, int deform_groupbit, int box_change) { int idim, otherdims; int i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; X_CFLOAT lo[3]; X_CFLOAT hi[3]; X_CFLOAT* period; if(_triclinic == 0) { lo[0] = _boxlo[0]; lo[1] = _boxlo[1]; lo[2] = _boxlo[2]; hi[0] = _boxhi[0]; hi[1] = _boxhi[1]; hi[2] = _boxhi[2]; period = _prd; } else { lo[0] = _boxlo_lamda[0]; lo[1] = _boxlo_lamda[1]; lo[2] = _boxlo_lamda[2]; hi[0] = _boxhi_lamda[0]; hi[1] = _boxhi_lamda[1]; hi[2] = _boxhi_lamda[2]; period = _prd_lamda; } X_CFLOAT tmpx = X_F(0.5) * (hi[0] + lo[0]); X_CFLOAT tmpy = X_F(0.5) * (hi[1] + lo[1]); X_CFLOAT tmpz = X_F(0.5) * (hi[2] + lo[2]); X_CFLOAT* buf = (X_CFLOAT*) _buffer; buf += blockIdx.x * gridDim.y + blockIdx.y; buf[0] = tmpx; buf += gridDim.x * gridDim.y; buf[0] = tmpx; buf += gridDim.x * gridDim.y; buf[0] = tmpy; buf += gridDim.x * gridDim.y; buf[0] = tmpy; buf += gridDim.x * gridDim.y; buf[0] = tmpz; buf += gridDim.x * gridDim.y; buf[0] = tmpz; if(i < _nlocal) { if(_periodicity[0]) { if(_x[i] < lo[0]) { _x[i] += period[0]; if(deform_remap && _mask[i] & deform_groupbit) _v[i] += _h_rate[0]; idim = _image[i] & 1023; otherdims = _image[i] ^ idim; idim--; idim &= 1023; _image[i] = otherdims | idim; } if(_x[i] >= hi[0]) { _x[i] -= period[0]; _x[i] = MAX(_x[i], lo[0]); if(deform_remap && _mask[i] & deform_groupbit) _v[i] -= _h_rate[0]; idim = _image[i] & 1023; otherdims = _image[i] ^ idim; idim++; idim &= 1023; _image[i] = otherdims | idim; } } if(_periodicity[1]) { if(_x[i + _nmax] < lo[1]) { _x[i + _nmax] += period[1]; if(deform_remap && _mask[i] & deform_groupbit) { _v[i] += _h_rate[5]; _v[i + _nmax] += _h_rate[1]; } idim = (_image[i] >> 10) & 1023; otherdims = _image[i] ^ (idim << 10); idim--; idim &= 1023; _image[i] = otherdims | (idim << 10); } if(_x[i + _nmax] >= hi[1]) { _x[i + _nmax] -= period[1]; _x[i + _nmax] = MAX(_x[i + _nmax], lo[1]); if(deform_remap && _mask[i] & deform_groupbit) { _v[i] -= _h_rate[5]; _v[i + _nmax] -= _h_rate[1]; } idim = (_image[i] >> 10) & 1023; otherdims = _image[i] ^ (idim << 10); idim++; idim &= 1023; _image[i] = otherdims | (idim << 10); } } if(_periodicity[2]) { if(_x[i + 2 * _nmax] < lo[2]) { _x[i + 2 * _nmax] += period[2]; if(deform_remap && _mask[i] & deform_groupbit) { _v[i] += _h_rate[4]; _v[i + _nmax] += _h_rate[3]; _v[i + 2 * _nmax] += _h_rate[2]; } idim = _image[i] >> 20; otherdims = _image[i] ^ (idim << 20); idim--; idim &= 1023; _image[i] = otherdims | (idim << 20); } if(_x[i + 2 * _nmax] >= hi[2]) { _x[i + 2 * _nmax] -= period[2]; _x[i + 2 * _nmax] = MAX(_x[i + 2 * _nmax], lo[2]); if(deform_remap && _mask[i] & deform_groupbit) { _v[i] -= _h_rate[4]; _v[i + _nmax] -= _h_rate[3]; _v[i + 2 * _nmax] -= _h_rate[2]; } idim = _image[i] >> 20; otherdims = _image[i] ^ (idim << 20); idim++; idim &= 1023; _image[i] = otherdims | (idim << 20); } } if(box_change) { tmpx = _x[i]; tmpy = _x[i + _nmax]; tmpz = _x[i + 2 * _nmax]; } } __syncthreads(); if(box_change) { X_CFLOAT minx = BIG; X_CFLOAT maxx = -BIG; X_CFLOAT miny = BIG; X_CFLOAT maxy = -BIG; X_CFLOAT minz = BIG; X_CFLOAT maxz = -BIG; if(not _periodicity[0]) { sharedmem[threadIdx.x] = tmpx; minOfBlock(sharedmem); minx = sharedmem[0]; __syncthreads(); sharedmem[threadIdx.x] = tmpx; maxOfBlock(sharedmem); maxx = sharedmem[0]; __syncthreads(); } else { minx = lo[0]; maxx = hi[0]; } if(not _periodicity[1]) { sharedmem[threadIdx.x] = tmpy; minOfBlock(sharedmem); miny = sharedmem[0]; __syncthreads(); sharedmem[threadIdx.x] = tmpy; maxOfBlock(sharedmem); maxy = sharedmem[0]; __syncthreads(); } else { minx = lo[1]; maxx = hi[1]; } if(not _periodicity[2]) { sharedmem[threadIdx.x] = tmpz; minOfBlock(sharedmem); minz = sharedmem[0]; __syncthreads(); sharedmem[threadIdx.x] = tmpz; maxOfBlock(sharedmem); maxz = sharedmem[0]; __syncthreads(); } else { minz = lo[2]; maxz = hi[2]; } if(threadIdx.x == 0) { buf = (X_CFLOAT*) _buffer; buf += blockIdx.x * gridDim.y + blockIdx.y; buf[0] = minx; buf += gridDim.x * gridDim.y; buf[0] = maxx; buf += gridDim.x * gridDim.y; buf[0] = miny; buf += gridDim.x * gridDim.y; buf[0] = maxy; buf += gridDim.x * gridDim.y; buf[0] = minz; buf += gridDim.x * gridDim.y; buf[0] = maxz; } } } __global__ void Domain_reduceBoxExtent(double* extent, int n) { X_CFLOAT* buf = (X_CFLOAT*) _buffer; buf += blockIdx.x * n; copyGlobToShared(buf, sharedmem, n); if(blockIdx.x % 2 == 0) minOfData(sharedmem, n); else maxOfData(sharedmem, n); extent[blockIdx.x] = sharedmem[0]; } __global__ void Domain_lamda2x_Kernel(int n) { int i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; if(i < n) { X_CFLOAT ytmp = _x[i + _nmax]; X_CFLOAT ztmp = _x[i + 2 * _nmax]; _x[i] = _h[0] * _x[i] + _h[5] * ytmp + _h[4] * ztmp + _boxlo[0]; _x[i + _nmax] = _h[1] * ytmp + _h[3] * ztmp + _boxlo[1]; _x[i + 2 * _nmax] = _h[2] * ztmp + _boxlo[2]; } } __global__ void Domain_x2lamda_Kernel(int n) { int i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; X_CFLOAT delta[3]; if(i < n) { delta[0] = _x[i] - _boxlo[0]; delta[1] = _x[i + _nmax] - _boxlo[1]; delta[2] = _x[i + 2 * _nmax] - _boxlo[2]; _x[i] = _h_inv[0] * delta[0] + _h_inv[5] * delta[1] + _h_inv[4] * delta[2]; _x[i + _nmax] = _h_inv[1] * delta[1] + _h_inv[3] * delta[2]; _x[i + 2 * _nmax] = _h_inv[2] * delta[2]; } }