// ************************************************************************** // ellipsoid_nbor.cu // ------------------- // W. Michael Brown // // Device code for Ellipsoid neighbor routines // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : brownw@ornl.gov // ***************************************************************************/ #ifndef ELLIPSOID_NBOR_H #define ELLIPSOID_NBOR_H #ifdef NV_KERNEL #include "nv_kernel_def.h" #else #pragma OPENCL EXTENSION cl_khr_fp64: enable #define GLOBAL_ID_X get_global_id(0) #define THREAD_ID_X get_local_id(0) #define BLOCK_ID_X get_group_id(0) #define BLOCK_SIZE_X get_local_size(0) #define __syncthreads() barrier(CLK_LOCAL_MEM_FENCE) #define MAX_SHARED_TYPES 8 #endif #ifdef _DOUBLE_DOUBLE #define numtyp double #define numtyp2 double2 #define numtyp4 double4 #else #define numtyp float #define numtyp2 float2 #define numtyp4 float4 #endif #define SBBITS 30 #define NEIGHMASK 0x3FFFFFFF // --------------------------------------------------------------------------- // Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access // -- Only unpack neighbors matching the specified inclusive range of forms // -- Only unpack neighbors within cutoff // --------------------------------------------------------------------------- __kernel void kernel_nbor(__global numtyp4 *x_, __global numtyp2 *cut_form, const int ntypes, __global int *dev_nbor, const int nbor_pitch, const int start, const int inum, __global int *dev_ij, const int form_low, const int form_high) { // ii indexes the two interacting particles in gi int ii=GLOBAL_ID_X+start; if (ii=form_low && cf.y<=form_high) { // Compute r12; numtyp rsq=jx.x-ix.x; rsq*=rsq; numtyp t=jx.y-ix.y; rsq+=t*t; t=jx.z-ix.z; rsq+=t*t; if (rsq=form_low && form[mtype]<=form_high) { // Compute r12; numtyp rsq=jx.x-ix.x; rsq*=rsq; numtyp t=jx.y-ix.y; rsq+=t*t; t=jx.z-ix.z; rsq+=t*t; if (rsq