diff --git a/lib/gpu/Makefile.nvidia b/lib/gpu/Makefile.nvidia index 47e1fefbe4..4bb2c3fef6 100644 --- a/lib/gpu/Makefile.nvidia +++ b/lib/gpu/Makefile.nvidia @@ -26,40 +26,30 @@ CUDA = $(CUDA_CPP) $(CUDA_ARCH) $(CUDA_PREC) CUDA_LIB = $(OBJ_DIR)/libgpu.a # Headers for CUDA Stuff -NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h +NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h nvc_traits.h # Headers for Pair Stuff PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h +# Dependencies for the Texture Tar +TAR_H = $(NVC_H) $(PAIR_H) pair_gpu_atom.cu lj_gpu_memory.h lj_gpu_memory.cu \ + lj_gpu_kernel.h lj_gpu.cu gb_gpu_memory.h gb_gpu_memory.cu \ + gb_gpu_extra.h gb_gpu_kernel.h gb_gpu.cu ALL_H = $(NVC_H) $(PAIR_H) EXECS = $(BIN_DIR)/nvc_get_devices -OBJS = $(OBJ_DIR)/nvc_device.o $(OBJ_DIR)/gb_gpu.cu_o \ - $(OBJ_DIR)/gb_gpu_memory.cu_o $(OBJ_DIR)/lj_gpu.cu_o \ - $(OBJ_DIR)/lj_gpu_memory.cu_o $(OBJ_DIR)/pair_gpu_atom.cu_o \ - $(OBJ_DIR)/pair_gpu_nbor.cu_o +OBJS = $(OBJ_DIR)/nvc_device.o $(OBJ_DIR)/pair_gpu_nbor.cu_o \ + $(OBJ_DIR)/pair_tex_tar.cu_o all: $(CUDA_LIB) $(EXECS) $(OBJ_DIR)/nvc_device.o: nvc_device.cu $(NVC_H) $(CUDA) -o $@ -c nvc_device.cu -$(OBJ_DIR)/pair_gpu_atom.cu_o: pair_gpu_atom.cu pair_gpu_texture.h pair_gpu_atom.h $(NVC_H) - $(CUDA) -o $@ -c pair_gpu_atom.cu - $(OBJ_DIR)/pair_gpu_nbor.cu_o: pair_gpu_nbor.cu pair_gpu_texture.h pair_gpu_nbor.h $(NVC_H) $(CUDA) -o $@ -c pair_gpu_nbor.cu -$(OBJ_DIR)/lj_gpu_memory.cu_o: lj_gpu_memory.cu lj_gpu_memory.h $(ALL_H) - $(CUDA) -o $@ -c lj_gpu_memory.cu - -$(OBJ_DIR)/lj_gpu.cu_o: lj_gpu.cu $(ALL_H) lj_gpu_memory.h lj_gpu_kernel.h - $(CUDA) -o $@ -c lj_gpu.cu - -$(OBJ_DIR)/gb_gpu_memory.cu_o: gb_gpu_memory.cu gb_gpu_memory.h $(ALL_H) - $(CUDA) -o $@ -c gb_gpu_memory.cu - -$(OBJ_DIR)/gb_gpu.cu_o: gb_gpu.cu $(ALL_H) gb_gpu_memory.h gb_gpu_kernel.h gb_gpu_extra.h - $(CUDA) -o $@ -c gb_gpu.cu +$(OBJ_DIR)/pair_tex_tar.cu_o: $(TAR_H) + $(CUDA) -o $@ -c pair_tex_tar.cu $(BIN_DIR)/nvc_get_devices: nvc_get_devices.cu $(NVC_H) $(OBJ_DIR)/nvc_device.o $(CUDA) -o $@ nvc_get_devices.cu $(CUDALNK) $(OBJ_DIR)/nvc_device.o diff --git a/lib/gpu/gb_gpu.cu b/lib/gpu/gb_gpu.cu index cde56d569a..174161331a 100644 --- a/lib/gpu/gb_gpu.cu +++ b/lib/gpu/gb_gpu.cu @@ -28,7 +28,7 @@ #include "nvc_macros.h" #include "nvc_timer.h" #include "nvc_device.h" -#include "gb_gpu_memory.h" +#include "gb_gpu_memory.cu" #include "gb_gpu_kernel.h" using namespace std; diff --git a/lib/gpu/gb_gpu_extra.h b/lib/gpu/gb_gpu_extra.h index c918e07070..87bcecb3ca 100644 --- a/lib/gpu/gb_gpu_extra.h +++ b/lib/gpu/gb_gpu_extra.h @@ -314,12 +314,12 @@ static __inline__ __device__ void gpu_quat_to_mat_trans(const int qi, numtyp i2 = qi4*qi4; numtyp j2 = qi5*qi5; numtyp k2 = qi6*qi6; - numtyp twoij = 2.0*qi4*qi5; - numtyp twoik = 2.0*qi4*qi6; - numtyp twojk = 2.0*qi5*qi6; - numtyp twoiw = 2.0*qi4*qi3; - numtyp twojw = 2.0*qi5*qi3; - numtyp twokw = 2.0*qi6*qi3; + numtyp twoij = (numtyp)2.0*qi4*qi5; + numtyp twoik = (numtyp)2.0*qi4*qi6; + numtyp twojk = (numtyp)2.0*qi5*qi6; + numtyp twoiw = (numtyp)2.0*qi4*qi3; + numtyp twojw = (numtyp)2.0*qi5*qi3; + numtyp twokw = (numtyp)2.0*qi6*qi3; mat[0] = w2+i2-j2-k2; mat[3] = twoij-twokw; diff --git a/lib/gpu/gb_gpu_kernel.h b/lib/gpu/gb_gpu_kernel.h index 8ca047e0e1..3d74d916f4 100644 --- a/lib/gpu/gb_gpu_kernel.h +++ b/lib/gpu/gb_gpu_kernel.h @@ -153,7 +153,7 @@ __global__ void kernel_gayberne(const numtyp *gum, const numtyp *special_lj, int j=*nbor; if (j < nall) - factor_lj = 1.0; + factor_lj = (numtyp)1.0; else { factor_lj = sp_lj[j/nall]; j %= nall; @@ -443,7 +443,7 @@ __global__ void kernel_sphere_gb(const numtyp *gum, const numtyp *special_lj, int j=*nbor; if (j < nall) - factor_lj = 1.0; + factor_lj = (numtyp)1.0; else { factor_lj = sp_lj[j/nall]; j %= nall; @@ -675,7 +675,7 @@ __global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor, int j=*list; if (j < nall) - factor_lj = 1.0; + factor_lj = (numtyp)1.0; else { factor_lj = sp_lj[j/nall]; j %= nall; @@ -799,7 +799,7 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor, int j=*list; if (j < nall) - factor_lj = 1.0; + factor_lj = (numtyp)1.0; else { factor_lj = sp_lj[j/nall]; j %= nall; diff --git a/lib/gpu/gb_gpu_memory.cu b/lib/gpu/gb_gpu_memory.cu index 021cd85e96..aab5ea55a5 100644 --- a/lib/gpu/gb_gpu_memory.cu +++ b/lib/gpu/gb_gpu_memory.cu @@ -73,21 +73,21 @@ int* GB_GPU_MemoryT::init(const int ij_size, const int ntypes, host_write[2]=static_cast(mu); gamma_upsilon_mu.copy_from_host(host_write.begin()); - lshape.safe_alloc(ntypes); + lshape.safe_alloc(ntypes,lshape_get_texture()); lshape.cast_copy(host_lshape,host_write); lshape.copy_from_host(host_write.begin()); // Copy shape, well, sigma, epsilon, and cutsq onto GPU - shape.safe_alloc(ntypes,3); + shape.safe_alloc(ntypes,3,shape_get_texture()); shape.cast_copy(host_shape[0],host_write); - well.safe_alloc(ntypes,3); + well.safe_alloc(ntypes,3,well_get_texture()); well.cast_copy(host_well[0],host_write); // Copy LJ data onto GPU int lj_types=ntypes; if (lj_types<=MAX_SHARED_TYPES) lj_types=MAX_SHARED_TYPES; - form.safe_alloc(lj_types,lj_types); + form.safe_alloc(lj_types,lj_types,form_get_texture()); form.copy_2Dfrom_host(host_form[0],ntypes,ntypes); // See if we want fast GB-sphere or sphere-sphere calculations @@ -100,12 +100,6 @@ int* GB_GPU_MemoryT::init(const int ij_size, const int ntypes, // Memory for ilist ordered by particle type host_olist.safe_alloc_rw(this->max_atoms); - // Bind constant data to textures - lshape_bind_texture(lshape); - shape_bind_texture(shape); - well_bind_texture(well); - form_bind_texture(form); - return this->nbor.host_ij.begin(); } @@ -124,9 +118,7 @@ void GB_GPU_MemoryT::clear() { LJ_GPU_MemoryT::clear(); - shape_unbind_texture(); - well_unbind_texture(); - form_unbind_texture(); + lshape.unbind(); shape.clear(); well.clear(); diff --git a/lib/gpu/gb_gpu_memory.h b/lib/gpu/gb_gpu_memory.h index 8886931c10..4d607ed14e 100644 --- a/lib/gpu/gb_gpu_memory.h +++ b/lib/gpu/gb_gpu_memory.h @@ -26,12 +26,11 @@ #define MAX_GPU_THREADS 4 #include "lj_gpu_memory.h" -#define LJ_GPU_MemoryT LJ_GPU_Memory enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE}; template -class GB_GPU_Memory : public LJ_GPU_MemoryT { +class GB_GPU_Memory : public LJ_GPU_Memory { public: GB_GPU_Memory(); ~GB_GPU_Memory(); diff --git a/lib/gpu/lj_gpu.cu b/lib/gpu/lj_gpu.cu index ea5ba440cc..94d826e149 100644 --- a/lib/gpu/lj_gpu.cu +++ b/lib/gpu/lj_gpu.cu @@ -26,7 +26,8 @@ #include "nvc_macros.h" #include "nvc_timer.h" #include "nvc_device.h" -#include "lj_gpu_memory.h" +#include "pair_gpu_texture.h" +#include "lj_gpu_memory.cu" #include "lj_gpu_kernel.h" using namespace std; diff --git a/lib/gpu/lj_gpu_memory.cu b/lib/gpu/lj_gpu_memory.cu index 484e28e86d..5e8bf4cdc6 100644 --- a/lib/gpu/lj_gpu_memory.cu +++ b/lib/gpu/lj_gpu_memory.cu @@ -75,11 +75,11 @@ int* LJ_GPU_MemoryT::init(const int ij_size, const int ntypes, special_lj.cast_copy(host_special_lj,host_write); // Copy sigma, epsilon, and cutsq onto GPU - sigma.safe_alloc(ntypes,ntypes); + sigma.safe_alloc(ntypes,ntypes,sigma_get_texture()); sigma.cast_copy(host_sigma[0],host_write); - epsilon.safe_alloc(ntypes,ntypes); + epsilon.safe_alloc(ntypes,ntypes,epsilon_get_texture()); epsilon.cast_copy(host_epsilon[0],host_write); - cutsq.safe_alloc(ntypes,ntypes); + cutsq.safe_alloc(ntypes,ntypes,cutsq_get_texture()); cutsq.cast_copy(host_cutsq[0],host_write); // If atom type constants fit in shared memory use fast kernel @@ -89,35 +89,27 @@ int* LJ_GPU_MemoryT::init(const int ij_size, const int ntypes, lj_types=MAX_SHARED_TYPES; shared_types=true; } - offset.safe_alloc(lj_types,lj_types); + offset.safe_alloc(lj_types,lj_types,offset_get_texture()); offset.cast_copy2D(host_offset[0],host_write,ntypes,ntypes); double *t1=host_lj1[0]; double *t2=host_lj2[0]; - for (int i=0; i::vec2 *> (host_write.begin()), + lj1.safe_alloc(lj_types,lj_types,lj1_get_texture()); + lj1.copy_2Dfrom_host(reinterpret_cast::vec2 *> (host_write.begin()), ntypes,ntypes); t1=host_lj3[0]; t2=host_lj4[0]; - for (int i=0; i::vec2 *> (host_write.begin()), + lj3.safe_alloc(lj_types,lj_types,lj3_get_texture()); + lj3.copy_2Dfrom_host(reinterpret_cast::vec2 *> (host_write.begin()), ntypes,ntypes); - // Bind constant data to textures - sigma_bind_texture(sigma); - epsilon_bind_texture(epsilon); - cutsq_bind_texture(cutsq); - offset_bind_texture(offset); - lj1_bind_texture::vec2>(lj1); - lj3_bind_texture::vec2>(lj3); - dev_error.safe_alloc(1); dev_error.zero(); @@ -139,13 +131,6 @@ void LJ_GPU_MemoryT::clear() { atom.clear(); nbor.clear(); - sigma_unbind_texture(); - epsilon_unbind_texture(); - cutsq_unbind_texture(); - offset_unbind_texture(); - lj1_unbind_texture::vec2>(); - lj3_unbind_texture::vec2>(); - CUDA_SAFE_CALL(cudaStreamDestroy(pair_stream)); dev_error.clear(); diff --git a/lib/gpu/lj_gpu_memory.h b/lib/gpu/lj_gpu_memory.h index d6f3e1cf13..82a87ea6a8 100644 --- a/lib/gpu/lj_gpu_memory.h +++ b/lib/gpu/lj_gpu_memory.h @@ -25,6 +25,7 @@ #define LJ_GPU_MEMORY_H #include "nvc_device.h" +#include "nvc_traits.h" #include "pair_gpu_atom.h" #include "pair_gpu_nbor.h" @@ -70,7 +71,7 @@ class LJ_GPU_Memory { // --------------- Const Data for Atoms NVC_ConstMatT sigma, epsilon, cutsq, offset; - NVC_ConstMat< typename cu_vec_traits::vec2 > lj1, lj3; + NVC_ConstMat< typename nvc_vec_traits::vec2 > lj1, lj3; NVC_VecT special_lj; size_t max_atoms; diff --git a/lib/gpu/nvc_memory.h b/lib/gpu/nvc_memory.h index a83178985c..0ff5229c05 100644 --- a/lib/gpu/nvc_memory.h +++ b/lib/gpu/nvc_memory.h @@ -25,6 +25,7 @@ #define NVC_MEMORY_H #include +#include "nvc_macros.h" #define NVC_HostT NVC_Host #define NVC_HostD NVC_Host @@ -196,6 +197,10 @@ class NVC_Vec { _end=_array+cols; } + // Row vector on device (allocate and assign texture and bind) + inline void safe_alloc(const size_t cols, textureReference *t) + { safe_alloc(cols); assign_texture(t); bind(); } + /// Free any memory associated with device inline void clear() { if (_cols>0) { _cols=0; CUDA_SAFE_CALL(cudaFree(_array)); } } @@ -242,17 +247,22 @@ class NVC_Vec { copy_from_host(host_write.begin()); } + /// Assign a texture to matrix + inline void assign_texture(textureReference *t) { _tex_ptr=t; } + /// Bind to texture - template - inline void bind_texture(texture &texi, cudaChannelFormatDesc &channel) { - NVC::cuda_gb_get_channel(channel); - texi.addressMode[0] = cudaAddressModeClamp; - texi.addressMode[1] = cudaAddressModeClamp; - texi.filterMode = cudaFilterModePoint; - texi.normalized = false; - CUDA_SAFE_CALL(cudaBindTexture(NULL,&texi,_array,&channel)); + inline void bind() { + NVC::cuda_gb_get_channel(_channel); + (*_tex_ptr).addressMode[0] = cudaAddressModeClamp; + (*_tex_ptr).addressMode[1] = cudaAddressModeClamp; + (*_tex_ptr).filterMode = cudaFilterModePoint; + (*_tex_ptr).normalized = false; + CUDA_SAFE_CALL(cudaBindTexture(NULL,_tex_ptr,_array,&_channel)); } + /// Unbind texture + inline void unbind() { CUDA_SAFE_CALL(cudaUnbindTexture(_tex_ptr)); } + /// Output the vector (debugging) inline void print(std::ostream &out) { print (out, numel()); } @@ -268,6 +278,8 @@ class NVC_Vec { private: numtyp *_array, *_end; size_t _row_bytes, _row_size, _rows, _cols; + cudaChannelFormatDesc _channel; + textureReference *_tex_ptr; }; /// 2D Matrix on device (can have extra column storage to get correct alignment) @@ -340,7 +352,10 @@ class NVC_ConstMat { public: NVC_ConstMat() { _rows=0; } ~NVC_ConstMat() { if (_rows>0) CUDA_SAFE_CALL(cudaFreeArray(_array)); } - + + /// Assign a texture to matrix + inline void assign_texture(textureReference *t) { _tex_ptr=t; } + /// Row major matrix on device inline void safe_alloc(const size_t rows, const size_t cols) { _rows=rows; @@ -350,19 +365,31 @@ class NVC_ConstMat { CUDA_SAFE_CALL(cudaMallocArray(&_array, &_channel, cols, rows)); } + /// Row major matrix on device (Allocate and bind texture) + inline void safe_alloc(const size_t rows, const size_t cols, + textureReference *t) + { safe_alloc(rows,cols); assign_texture(t); bind(); } + /// Bind to texture - template - inline void bind_texture(texture &texi) { - texi.addressMode[0] = cudaAddressModeClamp; - texi.addressMode[1] = cudaAddressModeClamp; - texi.filterMode = cudaFilterModePoint; - texi.normalized = false; - CUDA_SAFE_CALL(cudaBindTextureToArray(&texi,_array,&_channel)); + inline void bind() { + (*_tex_ptr).addressMode[0] = cudaAddressModeClamp; + (*_tex_ptr).addressMode[1] = cudaAddressModeClamp; + (*_tex_ptr).filterMode = cudaFilterModePoint; + (*_tex_ptr).normalized = false; + CUDA_SAFE_CALL(cudaBindTextureToArray(_tex_ptr,_array,&_channel)); } - /// Free any memory associated with device - inline void clear() - { if (_rows>0) { _rows=0; CUDA_SAFE_CALL(cudaFreeArray(_array)); } } + /// Unbind texture + inline void unbind() { CUDA_SAFE_CALL(cudaUnbindTexture(_tex_ptr)); } + + /// Free any memory associated with device and unbind + inline void clear() { + if (_rows>0) { + _rows=0; + CUDA_SAFE_CALL(cudaUnbindTexture(_tex_ptr)); + CUDA_SAFE_CALL(cudaFreeArray(_array)); + } + } inline size_t numel() const { return _cols*_rows; } inline size_t rows() const { return _rows; } @@ -442,6 +469,7 @@ class NVC_ConstMat { size_t _rows, _cols; cudaArray *_array; cudaChannelFormatDesc _channel; + textureReference *_tex_ptr; }; #endif diff --git a/lib/gpu/nvc_traits.h b/lib/gpu/nvc_traits.h new file mode 100644 index 0000000000..eee92b7028 --- /dev/null +++ b/lib/gpu/nvc_traits.h @@ -0,0 +1,31 @@ +/*************************************************************************** + nvc_texture_traits.h + ------------------- + W. Michael Brown + + Tricks for templating textures + + __________________________________________________________________________ + This file is part of the LAMMPS GPU Library + __________________________________________________________________________ + + begin : Tue Jun 23 2009 + copyright : (C) 2009 by W. Michael Brown + email : wmbrown@sandia.gov + ***************************************************************************/ + +/* ----------------------------------------------------------------------- + Copyright (2009) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + ----------------------------------------------------------------------- */ + +#ifndef NVC_TEXTURE_TRAITS_H +#define NVC_TEXTURE_TRAITS_H + +template class nvc_vec_traits; +template <> class nvc_vec_traits { public: typedef float2 vec2; }; +template <> class nvc_vec_traits { public: typedef double2 vec2; }; + +#endif diff --git a/lib/gpu/pair_gpu_atom.cu b/lib/gpu/pair_gpu_atom.cu index 94617a5c11..88b24e741e 100644 --- a/lib/gpu/pair_gpu_atom.cu +++ b/lib/gpu/pair_gpu_atom.cu @@ -21,7 +21,9 @@ the GNU General Public License. ----------------------------------------------------------------------- */ +#include "pair_gpu_texture.h" #include "pair_gpu_atom.h" + #define PairGPUAtomT PairGPUAtom template @@ -41,8 +43,7 @@ void PairGPUAtomT::init(const int max_atoms) { time_answer.init(); // Device matrices for atom and force data - dev_x.safe_alloc(atom_fields(),max_atoms); - x_bind_texture(dev_x); + dev_x.safe_alloc(atom_fields(),max_atoms,x_get_texture()); ans.safe_alloc(ans_fields(),max_atoms); // Get a host write only buffer @@ -59,7 +60,7 @@ void PairGPUAtomT::clear() { return; allocated=false; - x_unbind_texture(); + dev_x.unbind(); ans.clear(); host_write.clear(); host_read.clear(); diff --git a/lib/gpu/pair_gpu_atom.h b/lib/gpu/pair_gpu_atom.h index 18acafea3b..01f450ffcc 100644 --- a/lib/gpu/pair_gpu_atom.h +++ b/lib/gpu/pair_gpu_atom.h @@ -40,12 +40,12 @@ #ifndef PRECISION #define PRECISION float -#define ACC_PRECISION double +#define ACC_PRECISION float #define MAX_ATOMS 65536 #endif #include "nvc_timer.h" -#include "pair_gpu_texture.h" +#include "nvc_memory.h" template class PairGPUAtom { diff --git a/lib/gpu/pair_gpu_texture.h b/lib/gpu/pair_gpu_texture.h index 76bac71367..e647dda8b2 100644 --- a/lib/gpu/pair_gpu_texture.h +++ b/lib/gpu/pair_gpu_texture.h @@ -21,6 +21,7 @@ the GNU General Public License. ----------------------------------------------------------------------- */ +#include "nvc_traits.h" #include "nvc_memory.h" #ifndef PAIR_GPU_TEXTURE_H @@ -34,23 +35,20 @@ #define GB_GPU_DOUBLE #endif -template class cu_vec_traits; -template <> class cu_vec_traits { public: typedef float2 vec2; }; -template <> class cu_vec_traits { public: typedef double2 vec2; }; - // ------------------------------- x ------------------------------------ static texture x_float_tex; static texture x_double_tex; -template inline void x_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(x_float_tex); } - -template <> inline void x_bind_texture(NVC_ConstMatD &mat) - { mat.bind_texture(x_double_tex); } -template inline void x_unbind_texture() - { cudaUnbindTexture(x_float_tex); } -template <> inline void x_unbind_texture() - { cudaUnbindTexture(x_double_tex); } +template inline textureReference * x_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"x_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * x_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"x_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _x_(const int i, const int j) { return tex2D(x_float_tex,i,j); @@ -66,10 +64,11 @@ static __inline__ __device__ double _x_(const int i,const int j) { // ------------------------------- form ------------------------------------ static texture form_tex; -inline void form_bind_texture(NVC_ConstMatI &mat) - { mat.bind_texture(form_tex); } -inline void form_unbind_texture() - { cudaUnbindTexture(form_tex); } +inline textureReference * form_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"form_tex"); + return const_cast(ptr); +} static __inline__ __device__ int _form_(const int i, const int j) { return tex2D(form_tex,i,j); } @@ -78,15 +77,16 @@ static __inline__ __device__ int _form_(const int i, const int j) { static texture lshape_float_tex; static texture lshape_double_tex; -static cudaChannelFormatDesc channel_lshape; -template inline void lshape_bind_texture(NVC_VecT &vec) - { vec.bind_texture(lshape_float_tex,channel_lshape); } -template <> inline void lshape_bind_texture(NVC_VecD &vec) - { vec.bind_texture(lshape_double_tex,channel_lshape); } -template inline void lshape_unbind_texture() - { cudaUnbindTexture(lshape_float_tex); } -template <> inline void lshape_unbind_texture() - { cudaUnbindTexture(lshape_double_tex); } +template inline textureReference * lshape_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"lshape_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * lshape_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"lshape_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _lshape_(const int i) { return tex1Dfetch(lshape_float_tex,i); } @@ -102,14 +102,16 @@ static __inline__ __device__ double _lshape_(const int i) { static texture shape_float_tex; static texture shape_double_tex; -template inline void shape_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(shape_float_tex); } -template <> inline void shape_bind_texture(NVC_ConstMatD &mat) - { mat.bind_texture(shape_double_tex); } -template inline void shape_unbind_texture() - { cudaUnbindTexture(shape_float_tex); } -template <> inline void shape_unbind_texture() - { cudaUnbindTexture(shape_double_tex); } +template inline textureReference * shape_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"shape_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * shape_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"shape_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _shape_(const int i, const int j) { return tex2D(shape_float_tex,j,i); @@ -126,14 +128,16 @@ static __inline__ __device__ double _shape_(const int i, const int j) { static texture well_float_tex; static texture well_double_tex; -template inline void well_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(well_float_tex); } -template <> inline void well_bind_texture(NVC_ConstMatD &mat) - { mat.bind_texture(well_double_tex); } -template inline void well_unbind_texture() - { cudaUnbindTexture(well_float_tex); } -template <> inline void well_unbind_texture() - { cudaUnbindTexture(well_double_tex); } +template inline textureReference * well_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"well_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * well_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"well_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _well_(const int i, const int j) { return tex2D(well_float_tex,j,i); } @@ -149,14 +153,16 @@ static __inline__ __device__ double _well_(const int i,const int j) { static texture sigma_float_tex; static texture sigma_double_tex; -template inline void sigma_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(sigma_float_tex); } -template <> inline void sigma_bind_texture(NVC_ConstMatD &mat) - { mat.bind_texture(sigma_double_tex); } -template inline void sigma_unbind_texture() - { cudaUnbindTexture(sigma_float_tex); } -template <> inline void sigma_unbind_texture() - { cudaUnbindTexture(sigma_double_tex); } +template inline textureReference * sigma_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"sigma_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * sigma_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"sigma_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _sigma_(const int i, const int j) { return tex2D(sigma_float_tex,j,i); @@ -173,14 +179,16 @@ static __inline__ __device__ double _sigma_(const int i,const int j) { static texture epsilon_float_tex; static texture epsilon_double_tex; -template inline void epsilon_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(epsilon_float_tex); } -template <> inline void epsilon_bind_texture(NVC_ConstMatD &mat) - { mat.bind_texture(epsilon_double_tex); } -template inline void epsilon_unbind_texture() - { cudaUnbindTexture(epsilon_float_tex); } -template <> inline void epsilon_unbind_texture() - { cudaUnbindTexture(epsilon_double_tex); } +template inline textureReference * epsilon_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"epsilon_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * epsilon_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"epsilon_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _epsilon_(const int i, const int j) { return tex2D(epsilon_float_tex,j,i); @@ -197,14 +205,16 @@ static __inline__ __device__ double _epsilon_(const int i,const int j) { static texture cutsq_float_tex; static texture cutsq_double_tex; -template inline void cutsq_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(cutsq_float_tex); } -template <> inline void cutsq_bind_texture(NVC_ConstMatD &mat) - { mat.bind_texture(cutsq_double_tex); } -template inline void cutsq_unbind_texture() - { cudaUnbindTexture(cutsq_float_tex); } -template <> inline void cutsq_unbind_texture() - { cudaUnbindTexture(cutsq_double_tex); } +template inline textureReference * cutsq_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"cutsq_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * cutsq_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"cutsq_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _cutsq_(const int i, const int j) { return tex2D(cutsq_float_tex,j,i); @@ -221,17 +231,19 @@ static __inline__ __device__ double _cutsq_(const int i,const int j) { static texture lj1_float_tex; static texture lj1_double_tex; -template inline void lj1_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(lj1_float_tex); } -template <> inline void lj1_bind_texture(NVC_ConstMatD2 &mat) - { mat.bind_texture(lj1_double_tex); } -template inline void lj1_unbind_texture() - { cudaUnbindTexture(lj1_float_tex); } -template <> inline void lj1_unbind_texture() - { cudaUnbindTexture(lj1_double_tex); } +template inline textureReference * lj1_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"lj1_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * lj1_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"lj1_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ -typename cu_vec_traits::vec2 _lj1_(const int i, const int j) { +typename nvc_vec_traits::vec2 _lj1_(const int i, const int j) { return tex2D(lj1_float_tex,j,i); } #ifdef GB_GPU_DOUBLE @@ -249,17 +261,19 @@ static __inline__ __device__ double2 _lj1_(const int i,const int j) { static texture lj3_float_tex; static texture lj3_double_tex; -template inline void lj3_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(lj3_float_tex); } -template <> inline void lj3_bind_texture(NVC_ConstMatD2 &mat) - { mat.bind_texture(lj3_double_tex); } -template inline void lj3_unbind_texture() - { cudaUnbindTexture(lj3_float_tex); } -template <> inline void lj3_unbind_texture() - { cudaUnbindTexture(lj3_double_tex); } +template inline textureReference * lj3_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"lj3_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * lj3_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"lj3_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ -typename cu_vec_traits::vec2 _lj3_(const int i, const int j) { +typename nvc_vec_traits::vec2 _lj3_(const int i, const int j) { return tex2D(lj3_float_tex,j,i); } #ifdef GB_GPU_DOUBLE @@ -277,14 +291,16 @@ static __inline__ __device__ double2 _lj3_(const int i,const int j) { static texture offset_float_tex; static texture offset_double_tex; -template inline void offset_bind_texture(NVC_ConstMatT &mat) - { mat.bind_texture(offset_float_tex); } -template <> inline void offset_bind_texture(NVC_ConstMatD &mat) - { mat.bind_texture(offset_double_tex); } -template inline void offset_unbind_texture() - { cudaUnbindTexture(offset_float_tex); } -template <> inline void offset_unbind_texture() - { cudaUnbindTexture(offset_double_tex); } +template inline textureReference * offset_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"offset_float_tex"); + return const_cast(ptr); +} +template <> inline textureReference * offset_get_texture() { + const textureReference *ptr; + cudaGetTextureReference(&ptr,"offset_double_tex"); + return const_cast(ptr); +} template static __inline__ __device__ numtyp _offset_(const int i, const int j) { return tex2D(offset_float_tex,j,i); diff --git a/lib/gpu/pair_tex_tar.cu b/lib/gpu/pair_tex_tar.cu new file mode 100644 index 0000000000..d0b07177e7 --- /dev/null +++ b/lib/gpu/pair_tex_tar.cu @@ -0,0 +1,28 @@ +/*************************************************************************** + pair_tex_tar.cu + ------------------- + W. Michael Brown + + "Tar" of header and source files that need texture reference definitions + within file scope. + + __________________________________________________________________________ + This file is part of the LAMMPS GPU Library + __________________________________________________________________________ + + begin : Tue Jun 23 2009 + copyright : (C) 2009 by W. Michael Brown + email : wmbrown@sandia.gov + ***************************************************************************/ + +/* ----------------------------------------------------------------------- + Copyright (2009) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + ----------------------------------------------------------------------- */ + +#include "pair_gpu_atom.cu" +#include "lj_gpu.cu" +#include "gb_gpu.cu" +