diff --git a/lib/gpu/cg_cmm.cu b/lib/gpu/cg_cmm.cu index eef3c823db..b009c8618e 100644 --- a/lib/gpu/cg_cmm.cu +++ b/lib/gpu/cg_cmm.cu @@ -17,7 +17,7 @@ #include "preprocessor.h" texture pos_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } #endif #endif diff --git a/lib/gpu/cg_cmm_long.cu b/lib/gpu/cg_cmm_long.cu index 712656c765..cc6bf35760 100644 --- a/lib/gpu/cg_cmm_long.cu +++ b/lib/gpu/cg_cmm_long.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif #endif diff --git a/lib/gpu/cg_cmm_msm.cu b/lib/gpu/cg_cmm_msm.cu index 7f1aa812f4..190610cc2e 100644 --- a/lib/gpu/cg_cmm_msm.cu +++ b/lib/gpu/cg_cmm_msm.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif #endif diff --git a/lib/gpu/charmm_long.cu b/lib/gpu/charmm_long.cu index 5119e61947..84e3601ac8 100644 --- a/lib/gpu/charmm_long.cu +++ b/lib/gpu/charmm_long.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif #endif diff --git a/lib/gpu/coul_long.cu b/lib/gpu/coul_long.cu index 0857a1cc77..bf9d320e83 100644 --- a/lib/gpu/coul_long.cu +++ b/lib/gpu/coul_long.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif #endif diff --git a/lib/gpu/ellipsoid_extra.h b/lib/gpu/ellipsoid_extra.h index 540a032930..b8db7828a9 100644 --- a/lib/gpu/ellipsoid_extra.h +++ b/lib/gpu/ellipsoid_extra.h @@ -26,7 +26,7 @@ enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE}; dot product of 2 vectors ------------------------------------------------------------------------- */ -__inline numtyp gpu_dot3(const numtyp *v1, const numtyp *v2) +ucl_inline numtyp gpu_dot3(const numtyp *v1, const numtyp *v2) { return v1[0]*v2[0]+v1[1]*v2[1]+v1[2]*v2[2]; } @@ -35,7 +35,7 @@ __inline numtyp gpu_dot3(const numtyp *v1, const numtyp *v2) cross product of 2 vectors ------------------------------------------------------------------------- */ -__inline void gpu_cross3(const numtyp *v1, const numtyp *v2, numtyp *ans) +ucl_inline void gpu_cross3(const numtyp *v1, const numtyp *v2, numtyp *ans) { ans[0] = v1[1]*v2[2]-v1[2]*v2[1]; ans[1] = v1[2]*v2[0]-v1[0]*v2[2]; @@ -46,7 +46,7 @@ __inline void gpu_cross3(const numtyp *v1, const numtyp *v2, numtyp *ans) determinant of a matrix ------------------------------------------------------------------------- */ -__inline numtyp gpu_det3(const numtyp m[9]) +ucl_inline numtyp gpu_det3(const numtyp m[9]) { numtyp ans = m[0]*m[4]*m[8] - m[0]*m[5]*m[7] - m[3]*m[1]*m[8] + m[3]*m[2]*m[7] + @@ -58,7 +58,7 @@ __inline numtyp gpu_det3(const numtyp m[9]) diagonal matrix times a full matrix ------------------------------------------------------------------------- */ -__inline void gpu_diag_times3(const numtyp4 shape, const numtyp m[9], +ucl_inline void gpu_diag_times3(const numtyp4 shape, const numtyp m[9], numtyp ans[9]) { ans[0] = shape.x*m[0]; @@ -76,7 +76,7 @@ __inline void gpu_diag_times3(const numtyp4 shape, const numtyp m[9], add two matrices ------------------------------------------------------------------------- */ -__inline void gpu_plus3(const numtyp m[9], const numtyp m2[9], numtyp ans[9]) +ucl_inline void gpu_plus3(const numtyp m[9], const numtyp m2[9], numtyp ans[9]) { ans[0] = m[0]+m2[0]; ans[1] = m[1]+m2[1]; @@ -93,7 +93,7 @@ __inline void gpu_plus3(const numtyp m[9], const numtyp m2[9], numtyp ans[9]) multiply the transpose of mat1 times mat2 ------------------------------------------------------------------------- */ -__inline void gpu_transpose_times3(const numtyp m[9], const numtyp m2[9], +ucl_inline void gpu_transpose_times3(const numtyp m[9], const numtyp m2[9], numtyp ans[9]) { ans[0] = m[0]*m2[0]+m[3]*m2[3]+m[6]*m2[6]; @@ -111,7 +111,7 @@ __inline void gpu_transpose_times3(const numtyp m[9], const numtyp m2[9], row vector times matrix ------------------------------------------------------------------------- */ -__inline void gpu_row_times3(const numtyp *v, const numtyp m[9], numtyp *ans) +ucl_inline void gpu_row_times3(const numtyp *v, const numtyp m[9], numtyp *ans) { ans[0] = m[0]*v[0]+v[1]*m[3]+v[2]*m[6]; ans[1] = v[0]*m[1]+m[4]*v[1]+v[2]*m[7]; @@ -124,7 +124,7 @@ __inline void gpu_row_times3(const numtyp *v, const numtyp m[9], numtyp *ans) error_flag set to 2 if bad matrix inversion attempted ------------------------------------------------------------------------- */ -__inline void gpu_mldivide3(const numtyp m[9], const numtyp *v, numtyp *ans, +ucl_inline void gpu_mldivide3(const numtyp m[9], const numtyp *v, numtyp *ans, __global int *error_flag) { // create augmented matrix for pivoting @@ -243,7 +243,7 @@ __inline void gpu_mldivide3(const numtyp m[9], const numtyp *v, numtyp *ans, quat = [w i j k] ------------------------------------------------------------------------- */ -__inline void gpu_quat_to_mat_trans(__global const numtyp4 *qif, const int qi, +ucl_inline void gpu_quat_to_mat_trans(__global const numtyp4 *qif, const int qi, numtyp mat[9]) { numtyp4 q=qif[qi]; @@ -276,7 +276,7 @@ __inline void gpu_quat_to_mat_trans(__global const numtyp4 *qif, const int qi, transposed matrix times diagonal matrix ------------------------------------------------------------------------- */ -__inline void gpu_transpose_times_diag3(const numtyp m[9], +ucl_inline void gpu_transpose_times_diag3(const numtyp m[9], const numtyp4 d, numtyp ans[9]) { ans[0] = m[0]*d.x; @@ -294,7 +294,7 @@ __inline void gpu_transpose_times_diag3(const numtyp m[9], multiply mat1 times mat2 ------------------------------------------------------------------------- */ -__inline void gpu_times3(const numtyp m[9], const numtyp m2[9], +ucl_inline void gpu_times3(const numtyp m[9], const numtyp m2[9], numtyp ans[9]) { ans[0] = m[0]*m2[0] + m[1]*m2[3] + m[2]*m2[6]; @@ -312,7 +312,7 @@ __inline void gpu_times3(const numtyp m[9], const numtyp m2[9], Apply principal rotation generator about x to rotation matrix m ------------------------------------------------------------------------- */ -__inline void gpu_rotation_generator_x(const numtyp m[9], numtyp ans[9]) +ucl_inline void gpu_rotation_generator_x(const numtyp m[9], numtyp ans[9]) { ans[0] = 0; ans[1] = -m[2]; @@ -329,7 +329,7 @@ __inline void gpu_rotation_generator_x(const numtyp m[9], numtyp ans[9]) Apply principal rotation generator about y to rotation matrix m ------------------------------------------------------------------------- */ -__inline void gpu_rotation_generator_y(const numtyp m[9], numtyp ans[9]) +ucl_inline void gpu_rotation_generator_y(const numtyp m[9], numtyp ans[9]) { ans[0] = m[2]; ans[1] = 0; @@ -346,7 +346,7 @@ __inline void gpu_rotation_generator_y(const numtyp m[9], numtyp ans[9]) Apply principal rotation generator about z to rotation matrix m ------------------------------------------------------------------------- */ -__inline void gpu_rotation_generator_z(const numtyp m[9], numtyp ans[9]) +ucl_inline void gpu_rotation_generator_z(const numtyp m[9], numtyp ans[9]) { ans[0] = -m[1]; ans[1] = m[0]; @@ -363,7 +363,7 @@ __inline void gpu_rotation_generator_z(const numtyp m[9], numtyp ans[9]) matrix times vector ------------------------------------------------------------------------- */ -__inline void gpu_times_column3(const numtyp m[9], const numtyp v[3], +ucl_inline void gpu_times_column3(const numtyp m[9], const numtyp v[3], numtyp ans[3]) { ans[0] = m[0]*v[0] + m[1]*v[1] + m[2]*v[2]; diff --git a/lib/gpu/gayberne.cu b/lib/gpu/gayberne.cu index 27f6f7e5ba..f7e2b13b47 100644 --- a/lib/gpu/gayberne.cu +++ b/lib/gpu/gayberne.cu @@ -17,7 +17,7 @@ #include "ellipsoid_extra.h" #endif -__inline void compute_eta_torque(numtyp m[9],numtyp m2[9], const numtyp4 shape, +ucl_inline void compute_eta_torque(numtyp m[9],numtyp m2[9], const numtyp4 shape, numtyp ans[9]) { numtyp den = m[3]*m[2]*m[7]-m[0]*m[5]*m[7]- diff --git a/lib/gpu/geryon/ucl_nv_kernel.h b/lib/gpu/geryon/ucl_nv_kernel.h index 5a5f739d84..bdba8ff7ae 100644 --- a/lib/gpu/geryon/ucl_nv_kernel.h +++ b/lib/gpu/geryon/ucl_nv_kernel.h @@ -55,10 +55,7 @@ typedef struct _double4 double4; #define __local __shared__ #define __global #define atom_add atomicAdd - -#ifndef __inline -#define __inline static __inline__ __device__ -#endif +#define ucl_inline static __inline__ __device__ #endif diff --git a/lib/gpu/lj.cu b/lib/gpu/lj.cu index 075d020bd7..8d615af54f 100644 --- a/lib/gpu/lj.cu +++ b/lib/gpu/lj.cu @@ -17,7 +17,7 @@ #include "preprocessor.h" texture pos_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } #endif #endif diff --git a/lib/gpu/lj96.cu b/lib/gpu/lj96.cu index 3e0f1d2e38..a4129e289b 100644 --- a/lib/gpu/lj96.cu +++ b/lib/gpu/lj96.cu @@ -17,7 +17,7 @@ #include "preprocessor.h" texture pos_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } #endif #endif diff --git a/lib/gpu/lj_class2_long.cu b/lib/gpu/lj_class2_long.cu index 80c4281db8..61e1e02640 100644 --- a/lib/gpu/lj_class2_long.cu +++ b/lib/gpu/lj_class2_long.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif #endif diff --git a/lib/gpu/lj_coul.cu b/lib/gpu/lj_coul.cu index 581f280009..24a0997863 100644 --- a/lib/gpu/lj_coul.cu +++ b/lib/gpu/lj_coul.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif #endif diff --git a/lib/gpu/lj_coul_long.cu b/lib/gpu/lj_coul_long.cu index eec03fcb28..c9720fdc52 100644 --- a/lib/gpu/lj_coul_long.cu +++ b/lib/gpu/lj_coul_long.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif #endif diff --git a/lib/gpu/lj_expand.cu b/lib/gpu/lj_expand.cu index c173d0e8ca..a2a905cf78 100644 --- a/lib/gpu/lj_expand.cu +++ b/lib/gpu/lj_expand.cu @@ -17,7 +17,7 @@ #include "preprocessor.h" texture pos_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } #endif #endif diff --git a/lib/gpu/morse.cu b/lib/gpu/morse.cu index 3dfa643fa6..a1a42582ad 100644 --- a/lib/gpu/morse.cu +++ b/lib/gpu/morse.cu @@ -17,7 +17,7 @@ #include "preprocessor.h" texture pos_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } #endif #endif diff --git a/lib/gpu/neighbor_gpu.cu b/lib/gpu/neighbor_gpu.cu index 3142e88ab1..98ae06b9e6 100644 --- a/lib/gpu/neighbor_gpu.cu +++ b/lib/gpu/neighbor_gpu.cu @@ -18,7 +18,7 @@ #include "preprocessor.h" texture neigh_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(neigh_tex, i); } #endif #endif diff --git a/lib/gpu/pppm.cu b/lib/gpu/pppm.cu index cb93ac8076..bbc3d8e454 100644 --- a/lib/gpu/pppm.cu +++ b/lib/gpu/pppm.cu @@ -18,9 +18,9 @@ texture pos_tex; texture q_tex; #ifndef _DOUBLE_DOUBLE -__inline float4 fetch_pos(const int& i, const float4 *pos) +ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } -__inline float fetch_q(const int& i, const float *q) +ucl_inline float fetch_q(const int& i, const float *q) { return tex1Dfetch(q_tex, i); } #endif diff --git a/lib/gpu/preprocessor.h b/lib/gpu/preprocessor.h index 78f73cd327..569a34ab79 100644 --- a/lib/gpu/preprocessor.h +++ b/lib/gpu/preprocessor.h @@ -100,8 +100,8 @@ #define MAX_BIO_SHARED_TYPES 128 #ifdef _DOUBLE_DOUBLE -__inline double4 fetch_pos(const int& i, const double4 *pos) { return pos[i]; } -__inline double fetch_q(const int& i, const double *q) { return q[i]; } +ucl_inline double4 fetch_pos(const int& i, const double4 *pos) { return pos[i]; } +ucl_inline double fetch_q(const int& i, const double *q) { return q[i]; } #endif #if (__CUDA_ARCH__ < 200) @@ -134,7 +134,7 @@ typedef struct _double4 double4; #define __local __shared__ #define __global #define atom_add atomicAdd -#define __inline static __inline__ __device__ +#define ucl_inline static __inline__ __device__ #endif @@ -166,7 +166,7 @@ typedef struct _double4 double4; #define MAX_BIO_SHARED_TYPES 128 #define __syncthreads() barrier(CLK_LOCAL_MEM_FENCE) -#define __inline inline +#define ucl_inline inline #define fetch_pos(i,y) x_[i] #define fetch_q(i,y) q_[i] @@ -214,5 +214,5 @@ typedef struct _double4 double4; #define SBBITS 30 #define NEIGHMASK 0x3FFFFFFF -__inline int sbmask(int j) { return j >> SBBITS & 3; } +ucl_inline int sbmask(int j) { return j >> SBBITS & 3; } diff --git a/lib/gpu/re_squared.cu b/lib/gpu/re_squared.cu index fd6c6b0577..87a98d23be 100644 --- a/lib/gpu/re_squared.cu +++ b/lib/gpu/re_squared.cu @@ -17,7 +17,7 @@ #include "ellipsoid_extra.h" #endif -__inline numtyp det_prime(const numtyp m[9], const numtyp m2[9]) +ucl_inline numtyp det_prime(const numtyp m[9], const numtyp m2[9]) { numtyp ans; ans = m2[0]*m[4]*m[8] - m2[0]*m[5]*m[7] -