git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7283 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -146,6 +146,10 @@ void Neighbor::alloc(bool &success) {
|
|||||||
ptr+=_max_nbors;
|
ptr+=_max_nbors;
|
||||||
}
|
}
|
||||||
_c_bytes+=dev_host_nbor.row_bytes()+dev_host_numj.row_bytes();
|
_c_bytes+=dev_host_nbor.row_bytes()+dev_host_numj.row_bytes();
|
||||||
|
} else {
|
||||||
|
// Some OpenCL implementations return errors for NULL pointers as args
|
||||||
|
dev_host_nbor.view(dev_nbor);
|
||||||
|
dev_host_numj.view(dev_nbor);
|
||||||
}
|
}
|
||||||
if (_maxspecial>0) {
|
if (_maxspecial>0) {
|
||||||
dev_nspecial.clear();
|
dev_nspecial.clear();
|
||||||
@ -460,7 +464,8 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
|||||||
ptr+=mn;
|
ptr+=mn;
|
||||||
}
|
}
|
||||||
_gpu_bytes+=dev_host_nbor.row_bytes();
|
_gpu_bytes+=dev_host_nbor.row_bytes();
|
||||||
}
|
} else
|
||||||
|
dev_host_nbor.view(dev_nbor);
|
||||||
if (_alloc_packed) {
|
if (_alloc_packed) {
|
||||||
dev_packed.clear();
|
dev_packed.clear();
|
||||||
success=success && (dev_packed.alloc((mn+2)*_max_atoms,*dev,
|
success=success && (dev_packed.alloc((mn+2)*_max_atoms,*dev,
|
||||||
|
|||||||
@ -200,7 +200,7 @@ __kernel void calc_neigh_list_cell(__global numtyp4 *x_,
|
|||||||
r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
|
r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
|
||||||
if (r2 < cell_size*cell_size && r2 > 1e-5) {
|
if (r2 < cell_size*cell_size && r2 > 1e-5) {
|
||||||
cnt++;
|
cnt++;
|
||||||
if (cnt < neigh_bin_size) {
|
if (cnt <= neigh_bin_size) {
|
||||||
*neigh_list = pid_j;
|
*neigh_list = pid_j;
|
||||||
neigh_list++;
|
neigh_list++;
|
||||||
if ((cnt & (t_per_atom-1))==0)
|
if ((cnt & (t_per_atom-1))==0)
|
||||||
|
|||||||
@ -50,7 +50,8 @@ void NeighborShared::compile_kernels(UCL_Device &dev, const int gpu_nbor) {
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
_gpu_nbor=gpu_nbor;
|
_gpu_nbor=gpu_nbor;
|
||||||
std::string flags="-cl-fast-relaxed-math -cl-mad-enable -D"+
|
std::string flags="-cl-fast-relaxed-math -cl-mad-enable "+
|
||||||
|
std::string(OCL_PRECISION_COMPILE)+" -D"+
|
||||||
std::string(OCL_VENDOR);
|
std::string(OCL_VENDOR);
|
||||||
|
|
||||||
if (_gpu_nbor==0) {
|
if (_gpu_nbor==0) {
|
||||||
|
|||||||
@ -67,6 +67,22 @@
|
|||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
|
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
||||||
|
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
||||||
|
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
||||||
|
#define GLOBAL_SIZE_Y mul24(gridDim.y,blockDim.y);
|
||||||
|
#define THREAD_ID_X threadIdx.x
|
||||||
|
#define THREAD_ID_Y threadIdx.y
|
||||||
|
#define BLOCK_ID_X blockIdx.x
|
||||||
|
#define BLOCK_ID_Y blockIdx.y
|
||||||
|
#define BLOCK_SIZE_X blockDim.x
|
||||||
|
#define BLOCK_SIZE_Y blockDim.y
|
||||||
|
#define __kernel extern "C" __global__
|
||||||
|
#define __local __shared__
|
||||||
|
#define __global
|
||||||
|
#define atom_add atomicAdd
|
||||||
|
#define ucl_inline static __inline__ __device__
|
||||||
|
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
#define ARCH __CUDA_ARCH__
|
#define ARCH __CUDA_ARCH__
|
||||||
#else
|
#else
|
||||||
@ -120,24 +136,7 @@ struct __builtin_align__(16) _double4
|
|||||||
typedef struct _double4 double4;
|
typedef struct _double4 double4;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
#ifdef _DOUBLE_DOUBLE
|
||||||
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
|
||||||
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
|
||||||
#define GLOBAL_SIZE_Y mul24(gridDim.y,blockDim.y);
|
|
||||||
#define THREAD_ID_X threadIdx.x
|
|
||||||
#define THREAD_ID_Y threadIdx.y
|
|
||||||
#define BLOCK_ID_X blockIdx.x
|
|
||||||
#define BLOCK_ID_Y blockIdx.y
|
|
||||||
#define BLOCK_SIZE_X blockDim.x
|
|
||||||
#define BLOCK_SIZE_Y blockDim.y
|
|
||||||
#define __kernel extern "C" __global__
|
|
||||||
#define __local __shared__
|
|
||||||
#define __global
|
|
||||||
#define atom_add atomicAdd
|
|
||||||
#define ucl_inline static __inline__ __device__
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef _DOUBLE_DOUBLE
|
|
||||||
|
|
||||||
#define ucl_exp exp
|
#define ucl_exp exp
|
||||||
#define ucl_powr pow
|
#define ucl_powr pow
|
||||||
@ -156,20 +155,18 @@ typedef struct _double4 double4;
|
|||||||
#define ucl_ceil ceilf
|
#define ucl_ceil ceilf
|
||||||
#define ucl_abs fabsf
|
#define ucl_abs fabsf
|
||||||
#define ucl_recip(x) ((numtyp)1.0/(x))
|
#define ucl_recip(x) ((numtyp)1.0/(x))
|
||||||
|
#define ucl_rsqrt rsqrtf
|
||||||
|
#define ucl_sqrt sqrtf
|
||||||
|
|
||||||
#ifdef NO_HARDWARE_TRANSCENDENTALS
|
#ifdef NO_HARDWARE_TRANSCENDENTALS
|
||||||
|
|
||||||
#define ucl_exp expf
|
#define ucl_exp expf
|
||||||
#define ucl_powr powf
|
#define ucl_powr powf
|
||||||
#define ucl_rsqrt rsqrtf
|
|
||||||
#define ucl_sqrt sqrtf
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define ucl_exp __expf
|
#define ucl_exp __expf
|
||||||
#define ucl_powr __powf
|
#define ucl_powr __powf
|
||||||
#define ucl_rsqrt __rsqrtf
|
|
||||||
#define ucl_sqrt __sqrtf
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -255,6 +252,10 @@ typedef struct _double4 double4;
|
|||||||
#define ucl_ceil ceil
|
#define ucl_ceil ceil
|
||||||
#define ucl_abs fabs
|
#define ucl_abs fabs
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define NO_HARDWARE_TRANSCENDENTALS
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef NO_HARDWARE_TRANSCENDENTALS
|
#ifdef NO_HARDWARE_TRANSCENDENTALS
|
||||||
|
|
||||||
#define ucl_exp exp
|
#define ucl_exp exp
|
||||||
|
|||||||
Reference in New Issue
Block a user