/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Mike Brown (ORNL), brownw@ornl.gov ------------------------------------------------------------------------- */ #ifndef PAIR_GPU_KERNEL_H #define PAIR_GPU_KERNEL_H #define MAX_SHARED_TYPES 8 #ifdef _DOUBLE_DOUBLE #define numtyp double #define numtyp2 double2 #define numtyp4 double4 #else #define numtyp float #define numtyp2 float2 #define numtyp4 float4 #endif #ifdef NV_KERNEL #include "geryon/ucl_nv_kernel.h" #else #pragma OPENCL EXTENSION cl_khr_fp64: enable #define GLOBAL_ID_X get_global_id(0) #define THREAD_ID_X get_local_id(0) #define BLOCK_ID_X get_group_id(0) #define BLOCK_SIZE_X get_local_size(0) #define __syncthreads() barrier(CLK_LOCAL_MEM_FENCE) #endif // --------------------------------------------------------------------------- // Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access // -- Only unpack neighbors matching the specified inclusive range of forms // -- Only unpack neighbors within cutoff // --------------------------------------------------------------------------- __kernel void kernel_gb_nbor(__global numtyp4 *x_, __global numtyp2 *cut_form, const int ntypes, __global int *dev_nbor, const int nbor_pitch, const int start, const int inum, __global int *dev_ij, const int form_low, const int form_high, const int nall) { // ii indexes the two interacting particles in gi int ii=GLOBAL_ID_X+start; if (ii=nall) j%=nall; numtyp4 jx=x_[j]; int jtype=jx.w; int mtype=itype+jtype; numtyp2 cf=cut_form[mtype]; if (cf.y>=form_low && cf.y<=form_high) { // Compute r12; numtyp rsq=jx.x-ix.x; rsq*=rsq; numtyp t=jx.y-ix.y; rsq+=t*t; t=jx.z-ix.z; rsq+=t*t; if (rsq=nall) j%=nall; numtyp4 jx=x_[j]; int jtype=jx.w; int mtype=itype+jtype; if (form[mtype]>=form_low && form[mtype]<=form_high) { // Compute r12; numtyp rsq=jx.x-ix.x; rsq*=rsq; numtyp t=jx.y-ix.y; rsq+=t*t; t=jx.z-ix.z; rsq+=t*t; if (rsq