From 0ecefcd1ba64b9580707de6ba247c32ec26e1cd1 Mon Sep 17 00:00:00 2001 From: "W. Michael Brown" Date: Tue, 10 May 2011 16:38:17 -0400 Subject: [PATCH] Working re_squared. --- lib/gpu/Nvidia.makefile | 32 ++-- lib/gpu/Opencl.makefile | 8 +- lib/gpu/base_ellipsoid.cpp | 13 +- lib/gpu/base_ellipsoid.h | 8 +- lib/gpu/gayberne.cpp | 4 +- lib/gpu/re_squared.cpp | 38 ++-- lib/gpu/re_squared.cu | 28 +-- lib/gpu/re_squared.h | 15 +- lib/gpu/re_squared_ext.cpp | 35 ++-- src/ASPHERE/pair_resquared.h | 2 +- src/GPU/Install.sh | 4 + src/GPU/pair_gayberne_gpu.h | 5 +- src/GPU/pair_resquared_gpu.cpp | 325 +++++++++++++++++++++++++++++++++ src/GPU/pair_resquared_gpu.h | 49 +++++ 14 files changed, 468 insertions(+), 98 deletions(-) create mode 100644 src/GPU/pair_resquared_gpu.cpp create mode 100644 src/GPU/pair_resquared_gpu.h diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index aca8d8bba8..96df9c16e2 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -65,10 +65,10 @@ PTXS = $(OBJ_DIR)/pair_gpu_dev_kernel.ptx \ $(OBJ_DIR)/pair_gpu_build_kernel.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \ $(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \ $(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \ - $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/gayberne.ptx \ - $(OBJ_DIR)/gayberne_lj.ptx $(OBJ_DIR)/gayberne_ptx.h \ - $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx \ - $(OBJ_DIR)/re_squared_ptx.h \ + $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \ + $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx \ + $(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/re_squared.ptx \ + $(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h \ $(OBJ_DIR)/lj_cut_gpu_kernel.ptx $(OBJ_DIR)/lj_cut_gpu_ptx.h \ $(OBJ_DIR)/lj96_cut_gpu_kernel.ptx $(OBJ_DIR)/lj96_cut_gpu_ptx.h \ $(OBJ_DIR)/lj_expand_gpu_kernel.ptx $(OBJ_DIR)/lj_expand_gpu_ptx.h \ @@ -145,8 +145,8 @@ $(OBJ_DIR)/atomic_gpu_memory.o: $(ALL_H) atomic_gpu_memory.h atomic_gpu_memory.c $(OBJ_DIR)/charge_gpu_memory.o: $(ALL_H) charge_gpu_memory.h charge_gpu_memory.cpp $(CUDR) -o $@ -c charge_gpu_memory.cpp -$(OBJ_DIR)/base_ellipsoid.o: $(ALL_H) base_ellipsoid.h base_ellipsoid.cpp - $(CUDR) -o $@ -c base_ellipsoid.cpp +$(OBJ_DIR)/base_ellipsoid.o: $(ALL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_ptx.h + $(CUDR) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/pppm_f_gpu_kernel.ptx: pppm_gpu_kernel.cu pair_gpu_precision.h $(CUDA) --ptx -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ pppm_gpu_kernel.cu @@ -166,17 +166,20 @@ $(OBJ_DIR)/pppm_gpu_memory.o: $(ALL_H) pppm_gpu_memory.h pppm_gpu_memory.cpp $(O $(OBJ_DIR)/pppm_l_gpu.o: $(ALL_H) pppm_gpu_memory.h pppm_l_gpu.cpp $(CUDR) -o $@ -c pppm_l_gpu.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/ellipsoid_nbor.ptx: ellipsoid_nbor.cu pair_gpu_precision.h + $(CUDA) --ptx -DNV_KERNEL -o $@ ellipsoid_nbor.cu + +$(OBJ_DIR)/ellipsoid_nbor_ptx.h: $(OBJ_DIR)/ellipsoid_nbor.ptx + $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h + $(OBJ_DIR)/gayberne.ptx: gayberne.cu pair_gpu_precision.h ellipsoid_extra.h $(CUDA) --ptx -DNV_KERNEL -o $@ gayberne.cu $(OBJ_DIR)/gayberne_lj.ptx: gayberne_lj.cu pair_gpu_precision.h ellipsoid_extra.h $(CUDA) --ptx -DNV_KERNEL -o $@ gayberne_lj.cu -$(OBJ_DIR)/ellipsoid_nbor.ptx: ellipsoid_nbor.cu pair_gpu_precision.h - $(CUDA) --ptx -DNV_KERNEL -o $@ ellipsoid_nbor.cu - -$(OBJ_DIR)/gayberne_ptx.h: $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx - $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx $(OBJ_DIR)/gayberne_ptx.h +$(OBJ_DIR)/gayberne_ptx.h: $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx + $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx $(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/base_ellipsoid.o $(CUDR) -o $@ -c gayberne.cpp -I$(OBJ_DIR) @@ -190,11 +193,8 @@ $(OBJ_DIR)/re_squared.ptx: re_squared.cu pair_gpu_precision.h ellipsoid_extra.h $(OBJ_DIR)/re_squared_lj.ptx: re_squared_lj.cu pair_gpu_precision.h ellipsoid_extra.h $(CUDA) --ptx -DNV_KERNEL -o $@ re_squared_lj.cu -$(OBJ_DIR)/ellipsoid_nbor.ptx: ellipsoid_nbor.cu pair_gpu_precision.h - $(CUDA) --ptx -DNV_KERNEL -o $@ ellipsoid_nbor.cu - -$(OBJ_DIR)/re_squared_ptx.h: $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx - $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h +$(OBJ_DIR)/re_squared_ptx.h: $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx + $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h $(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_ptx.h $(OBJ_DIR)/base_ellipsoid.o $(CUDR) -o $@ -c re_squared.cpp -I$(OBJ_DIR) diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile index 88138be91f..25a0a06656 100644 --- a/lib/gpu/Opencl.makefile +++ b/lib/gpu/Opencl.makefile @@ -91,8 +91,8 @@ $(OBJ_DIR)/atomic_gpu_memory.o: $(OCL_H) atomic_gpu_memory.h atomic_gpu_memory.c $(OBJ_DIR)/charge_gpu_memory.o: $(OCL_H) charge_gpu_memory.h charge_gpu_memory.cpp $(OCL) -o $@ -c charge_gpu_memory.cpp -$(OBJ_DIR)/base_ellipsoid.o: $(OCL_H) base_ellipsoid.h base_ellipsoid.cpp - $(OCL) -o $@ -c base_ellipsoid.cpp +$(OBJ_DIR)/base_ellipsoid.o: $(OCL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h + $(OCL) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/pppm_gpu_cl.h: pppm_gpu_kernel.cu $(BSH) ./geryon/file_to_cstr.sh pppm_gpu_kernel.cu $(OBJ_DIR)/pppm_gpu_cl.h; @@ -112,7 +112,7 @@ $(OBJ_DIR)/gayberne_cl.h: gayberne.cu gayberne_lj.cu ellipsoid_extra.h $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar $(OBJ_DIR)/gayberne_cl.h; \ rm -f $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar -$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/base_ellipsoid.o +$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/base_ellipsoid.o $(OCL) -o $@ -c gayberne.cpp -I$(OBJ_DIR) $(OBJ_DIR)/gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/gayberne.o gayberne_ext.cpp @@ -124,7 +124,7 @@ $(OBJ_DIR)/re_squared_cl.h: re_squared.cu re_squared_lj.cu ellipsoid_extra.h $(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar $(OBJ_DIR)/re_squared_cl.h; \ rm -f $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar -$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/base_ellipsoid.o +$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/base_ellipsoid.o $(OCL) -o $@ -c re_squared.cpp -I$(OBJ_DIR) $(OBJ_DIR)/re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/re_squared.o re_squared_ext.cpp diff --git a/lib/gpu/base_ellipsoid.cpp b/lib/gpu/base_ellipsoid.cpp index 2f5c2e7aa1..65c5495345 100644 --- a/lib/gpu/base_ellipsoid.cpp +++ b/lib/gpu/base_ellipsoid.cpp @@ -16,6 +16,12 @@ #include "base_ellipsoid.h" using namespace LAMMPS_AL; +#ifdef USE_OPENCL +#include "ellipsoid_nbor_cl.h" +#else +#include "ellipsoid_nbor_ptx.h" +#endif + #define BaseEllipsoidT BaseEllipsoid extern PairGPUDevice pair_gpu_device; @@ -43,7 +49,6 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen, const int ntypes, int **h_form, - const char *nbor_program, const char *ellipsoid_program, const char *lj_program) { nbor_time_avail=false; @@ -69,7 +74,7 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall, atom=&device->atom; _block_size=device->pair_block_size(); - compile_kernels(*ucl_device,nbor_program,ellipsoid_program,lj_program); + compile_kernels(*ucl_device,ellipsoid_program,lj_program); // Initialize host-device load balancer hd_balancer.init(device,gpu_nbor,gpu_split); @@ -423,7 +428,7 @@ double BaseEllipsoidT::host_memory_usage_base() const { } template -void BaseEllipsoidT::compile_kernels(UCL_Device &dev, const char *nbor_string, +void BaseEllipsoidT::compile_kernels(UCL_Device &dev, const char *ellipsoid_string, const char *lj_string) { if (_compiled) @@ -433,7 +438,7 @@ void BaseEllipsoidT::compile_kernels(UCL_Device &dev, const char *nbor_string, std::string(OCL_PRECISION_COMPILE); nbor_program=new UCL_Program(dev); - nbor_program->load_string(nbor_string,flags.c_str()); + nbor_program->load_string(ellipsoid_nbor,flags.c_str()); k_nbor_fast.set_function(*nbor_program,"kernel_nbor_fast"); k_nbor.set_function(*nbor_program,"kernel_nbor"); diff --git a/lib/gpu/base_ellipsoid.h b/lib/gpu/base_ellipsoid.h index 2c083175df..2665c6cfdc 100644 --- a/lib/gpu/base_ellipsoid.h +++ b/lib/gpu/base_ellipsoid.h @@ -48,8 +48,8 @@ class BaseEllipsoid { int init_base(const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *screen, const int ntypes, - int **h_form, const char *nbor_program, - const char *ellipsoid_program, const char *lj_program); + int **h_form, const char *ellipsoid_program, + const char *lj_program); /// Estimate the overhead for GPU context changes and CPU driver void estimate_gpu_overhead(); @@ -233,8 +233,8 @@ class BaseEllipsoid { int **_host_form; int _last_ellipse, _max_last_ellipse; - void compile_kernels(UCL_Device &dev, const char *nbor_string, - const char *ellipsoid_string, const char *lj_string); + void compile_kernels(UCL_Device &dev, const char *ellipsoid_string, + const char *lj_string); virtual void loop(const bool _eflag, const bool _vflag) = 0; }; diff --git a/lib/gpu/gayberne.cpp b/lib/gpu/gayberne.cpp index 123073e50a..d6299d9f94 100644 --- a/lib/gpu/gayberne.cpp +++ b/lib/gpu/gayberne.cpp @@ -15,7 +15,6 @@ #ifdef USE_OPENCL #include "gayberne_cl.h" -#include "ellipsoid_nbor_cl.h" #else #include "gayberne_ptx.h" #endif @@ -56,8 +55,7 @@ int GayBerneT::init(const int ntypes, const double gamma, const double gpu_split, FILE *_screen) { int success; success=this->init_base(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,ntypes,h_form,ellipsoid_nbor, - gayberne,gayberne_lj); + _screen,ntypes,h_form,gayberne,gayberne_lj); if (success!=0) return success; diff --git a/lib/gpu/re_squared.cpp b/lib/gpu/re_squared.cpp index ce718560d4..83946dac98 100644 --- a/lib/gpu/re_squared.cpp +++ b/lib/gpu/re_squared.cpp @@ -15,7 +15,6 @@ #ifdef USE_OPENCL #include "re_squared_cl.h" -#include "ellipsoid_nbor_cl.h" #else #include "re_squared_ptx.h" #endif @@ -43,20 +42,18 @@ int RESquaredT::bytes_per_atom(const int max_nbors) const { } template -int RESquaredT::init(const int ntypes, const double gamma, - const double upsilon, const double mu, - double **host_shape, double **host_well, - double **host_cutsq, double **host_sigma, - double **host_epsilon, double *host_lshape, - int **h_form, double **host_lj1, double **host_lj2, - double **host_lj3, double **host_lj4, - double **host_offset, const double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen) { +int RESquaredT::init(const int ntypes, double **host_shape, double **host_well, + double **host_cutsq, double **host_sigma, + double **host_epsilon, double *host_lshape, int **h_form, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + const double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, const double gpu_split, + FILE *_screen) { int success; success=this->init_base(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,ntypes,h_form,ellipsoid_nbor, + _screen,ntypes,h_form, re_squared,re_squared_lj); if (success!=0) return success; @@ -99,15 +96,12 @@ int RESquaredT::init(const int ntypes, const double gamma, // Allocate, cast and asynchronous memcpy of constant data // Copy data for bonded interactions - gamma_upsilon_mu.alloc(7,*(this->ucl_device),UCL_READ_ONLY); - host_write[0]=static_cast(gamma); - host_write[1]=static_cast(upsilon); - host_write[2]=static_cast(mu); - host_write[3]=static_cast(host_special_lj[0]); - host_write[4]=static_cast(host_special_lj[1]); - host_write[5]=static_cast(host_special_lj[2]); - host_write[6]=static_cast(host_special_lj[3]); - ucl_copy(gamma_upsilon_mu,host_write,7,false); + gamma_upsilon_mu.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + host_write[0]=static_cast(host_special_lj[0]); + host_write[1]=static_cast(host_special_lj[1]); + host_write[2]=static_cast(host_special_lj[2]); + host_write[3]=static_cast(host_special_lj[3]); + ucl_copy(gamma_upsilon_mu,host_write,4,false); lshape.alloc(ntypes,*(this->ucl_device),UCL_READ_ONLY); UCL_H_Vec d_view; diff --git a/lib/gpu/re_squared.cu b/lib/gpu/re_squared.cu index e4603331d8..3170e8d8d5 100644 --- a/lib/gpu/re_squared.cu +++ b/lib/gpu/re_squared.cu @@ -101,15 +101,15 @@ __kernel void kernel_ellipsoid(__global numtyp4* x_,__global numtyp4 *q, ishape=shape[itype]; numtyp4 ishape2; ishape2.x=ishape.x*ishape.x; - ishape2.y=ishape.x*ishape.y; - ishape2.z=ishape.x*ishape.z; + ishape2.y=ishape.y*ishape.y; + ishape2.z=ishape.z*ishape.z; { numtyp aTs[9]; // A1'*S1^2 gpu_quat_to_mat_trans(q,i,a1); gpu_transpose_times_diag3(a1,well[itype],aTe1); - gpu_transpose_times_diag3(a1,ishape,aTs); - gpu_diag_times3(ishape,a1,sa1); + gpu_transpose_times_diag3(a1,ishape2,aTs); + gpu_diag_times3(ishape2,a1,sa1); gpu_times3(aTs,a1,gamma1); gpu_rotation_generator_x(a1,lA1_0); gpu_rotation_generator_y(a1,lA1_1); @@ -159,14 +159,14 @@ __kernel void kernel_ellipsoid(__global numtyp4* x_,__global numtyp4 *q, jshape=shape[jtype]; numtyp4 jshape2; jshape2.x=jshape.x*jshape.x; - jshape2.y=jshape.x*jshape.y; - jshape2.z=jshape.x*jshape.z; + jshape2.y=jshape.y*jshape.y; + jshape2.z=jshape.z*jshape.z; { numtyp aTs[9]; // A1'*S1^2 gpu_quat_to_mat_trans(q,j,a2); gpu_transpose_times_diag3(a2,well[jtype],aTe2); - gpu_transpose_times_diag3(a2,jshape,aTs); - gpu_diag_times3(jshape,a2,sa2); + gpu_transpose_times_diag3(a2,jshape2,aTs); + gpu_diag_times3(jshape2,a2,sa2); gpu_times3(aTs,a2,gamma2); gpu_rotation_generator_x(a2,lA2_0); gpu_rotation_generator_y(a2,lA2_1); @@ -357,19 +357,19 @@ __kernel void kernel_ellipsoid(__global numtyp4* x_,__global numtyp4 *q, if (i==0) { f.x+=force; if (vflag>0) - virial[0]+=r[0]*force; + virial[0]+=-r[0]*force; } else if (i==1) { f.y+=force; if (vflag>0) { - virial[1]+=r[1]*force; - virial[3]+=r[0]*force; + virial[1]+=-r[1]*force; + virial[3]+=-r[0]*force; } } else { f.z+=force; if (vflag>0) { - virial[2]+=r[2]*force; - virial[4]+=r[0]*force; - virial[5]+=r[1]*force; + virial[2]+=-r[2]*force; + virial[4]+=-r[0]*force; + virial[5]+=-r[1]*force; } } } diff --git a/lib/gpu/re_squared.h b/lib/gpu/re_squared.h index bebc12f204..85fc738d07 100644 --- a/lib/gpu/re_squared.h +++ b/lib/gpu/re_squared.h @@ -40,14 +40,13 @@ class RESquared : public BaseEllipsoid { * - -3 if there is an out of memory error * - -4 if the GPU library was not compiled for GPU * - -5 Double precision is not supported on card **/ - int init(const int ntypes, const double gamma, - const double upsilon, const double mu, double **host_shape, - double **host_well, double **host_cutsq, double **host_sigma, - double **host_epsilon, double *host_lshape, int **h_form, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, - const double *host_special_lj, const int nlocal, const int nall, - const int max_nbors, const int maxspecial, const double cell_size, + int init(const int ntypes, double **host_shape, double **host_well, + double **host_cutsq, double **host_sigma, double **host_epsilon, + double *host_lshape, int **h_form, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **host_offset, const double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, const double gpu_split, FILE *screen); /// Clear all host and device data diff --git a/lib/gpu/re_squared_ext.cpp b/lib/gpu/re_squared_ext.cpp index f573496004..38a6acfaf4 100644 --- a/lib/gpu/re_squared_ext.cpp +++ b/lib/gpu/re_squared_ext.cpp @@ -27,15 +27,13 @@ static RESquared REMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -int re_gpu_init(const int ntypes, const double gamma, - const double upsilon, const double mu, double **shape, - double **well, double **cutsq, double **sigma, - double **epsilon, double *host_lshape, int **form, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int inum, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen) { +int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq, + double **sigma, double **epsilon, double *host_lshape, + int **form, double **host_lj1, double **host_lj2, + double **host_lj3, double **host_lj4, double **offset, + double *special_lj, const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { REMF.clear(); gpu_mode=REMF.device->gpu_mode(); double gpu_split=REMF.device->particle_split(); @@ -45,7 +43,7 @@ int re_gpu_init(const int ntypes, const double gamma, int gpu_rank=REMF.device->gpu_rank(); int procs_per_gpu=REMF.device->procs_per_gpu(); - REMF.device->init_message(screen,"gayberne",first_gpu,last_gpu); + REMF.device->init_message(screen,"resquared",first_gpu,last_gpu); bool message=false; if (REMF.device->replica_me()==0 && screen) @@ -58,11 +56,10 @@ int re_gpu_init(const int ntypes, const double gamma, int init_ok=0; if (world_me==0) - init_ok=REMF.init(ntypes, gamma, upsilon, mu, shape, well, cutsq, - sigma, epsilon, host_lshape, form, host_lj1, - host_lj2, host_lj3, host_lj4, offset, special_lj, - inum, nall, max_nbors, maxspecial, cell_size, gpu_split, - screen); + init_ok=REMF.init(ntypes, shape, well, cutsq, sigma, epsilon, host_lshape, + form, host_lj1, host_lj2, host_lj3, host_lj4, offset, + special_lj, inum, nall, max_nbors, maxspecial, cell_size, + gpu_split, screen); REMF.device->world_barrier(); if (message) @@ -78,10 +75,10 @@ int re_gpu_init(const int ntypes, const double gamma, fflush(screen); } if (gpu_rank==i && world_me!=0) - init_ok=REMF.init(ntypes, gamma, upsilon, mu, shape, well, cutsq, sigma, - epsilon, host_lshape, form, host_lj1, host_lj2, - host_lj3, host_lj4, offset, special_lj, inum, nall, - max_nbors, maxspecial, cell_size, gpu_split, screen); + init_ok=REMF.init(ntypes, shape, well, cutsq, sigma, epsilon, + host_lshape, form, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, + max_nbors, maxspecial, cell_size, gpu_split, screen); REMF.device->gpu_barrier(); if (message) diff --git a/src/ASPHERE/pair_resquared.h b/src/ASPHERE/pair_resquared.h index 6aa2273c45..f3e1dcc58f 100755 --- a/src/ASPHERE/pair_resquared.h +++ b/src/ASPHERE/pair_resquared.h @@ -38,7 +38,7 @@ class PairRESquared : public Pair { void write_restart_settings(FILE *); void read_restart_settings(FILE *); - private: + protected: double cut_global; double **cut; diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index dd5d7add03..202b4c956d 100644 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -26,6 +26,8 @@ if (test $1 = 1) then if (test -e ../pair_gayberne.cpp) then cp pair_gayberne_gpu.cpp .. cp pair_gayberne_gpu.h .. + cp pair_resquared_gpu.cpp .. + cp pair_resquared_gpu.h .. fi if (test -e ../pair_lj_cut_coul_long.cpp) then @@ -83,6 +85,7 @@ elif (test $1 = 0) then rm ../pppm_gpu_single.cpp rm ../pppm_gpu_double.cpp rm ../pair_gayberne_gpu.cpp + rm ../pair_resquared_gpu.cpp rm ../pair_lj_cut_gpu.cpp rm ../pair_morse_gpu.cpp rm ../pair_lj96_cut_gpu.cpp @@ -102,6 +105,7 @@ elif (test $1 = 0) then rm ../pppm_gpu_single.h rm ../pppm_gpu_double.h rm ../pair_gayberne_gpu.h + rm ../pair_resquared_gpu.h rm ../pair_lj_cut_gpu.h rm ../pair_morse_gpu.h rm ../pair_lj96_cut_gpu.h diff --git a/src/GPU/pair_gayberne_gpu.h b/src/GPU/pair_gayberne_gpu.h index 76d3b516a9..9ecae3c939 100644 --- a/src/GPU/pair_gayberne_gpu.h +++ b/src/GPU/pair_gayberne_gpu.h @@ -17,11 +17,10 @@ PairStyle(gayberne/gpu,PairGayBerneGPU) #else -#ifndef LMP_PAIR_GPU_H -#define LMP_PAIR_GPU_H +#ifndef LMP_PAIR_GAYBERNE_GPU_H +#define LMP_PAIR_GAYBERNE_GPU_H #include "pair_gayberne.h" -#define MAX_GPU_THREADS 1 namespace LAMMPS_NS { diff --git a/src/GPU/pair_resquared_gpu.cpp b/src/GPU/pair_resquared_gpu.cpp new file mode 100644 index 0000000000..e8896d6dd9 --- /dev/null +++ b/src/GPU/pair_resquared_gpu.cpp @@ -0,0 +1,325 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Brown (SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_resquared_gpu.h" +#include "math_extra.h" +#include "atom.h" +#include "atom_vec.h" +#include "atom_vec_ellipsoid.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "domain.h" +#include "update.h" +#include "string.h" +#include "gpu_extra.h" + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +// External functions from cuda library for atom decomposition + +int re_gpu_init(const int ntypes, double **shape, double **well, + double **cutsq, double **sigma, double **epsilon, + double *host_lshape, int **form, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); +void re_gpu_clear(); +int ** re_gpu_compute_n(const int ago, const int inum, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, int *tag, int **nspecial, int **special, + const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double **host_quat); +int * re_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, double **host_quat); +double re_gpu_bytes(); + +using namespace LAMMPS_NS; + +enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE}; + +/* ---------------------------------------------------------------------- */ + +PairRESquaredGPU::PairRESquaredGPU(LAMMPS *lmp) : PairRESquared(lmp), + gpu_mode(GPU_PAIR) +{ + avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid"); + if (!avec) + error->all("Pair gayberne requires atom style ellipsoid"); + quat_nmax = 0; + quat = NULL; +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairRESquaredGPU::~PairRESquaredGPU() +{ + re_gpu_clear(); + cpu_time = 0.0; + memory->destroy(quat); +} + +/* ---------------------------------------------------------------------- */ + +void PairRESquaredGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + + if (nall > quat_nmax) { + quat_nmax = static_cast(1.1 * nall); + memory->grow(quat, quat_nmax, 4, "pair:quat"); + } + AtomVecEllipsoid::Bonus *bonus = avec->bonus; + int *ellipsoid = atom->ellipsoid; + for (int i=0; i -1) { + quat[i][0] = bonus[qi].quat[0]; + quat[i][1] = bonus[qi].quat[1]; + quat[i][2] = bonus[qi].quat[2]; + quat[i][3] = bonus[qi].quat[3]; + } + } + + if (gpu_mode == GPU_NEIGH) { + inum = atom->nlocal; + firstneigh = re_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, quat); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + olist = re_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, + eflag_atom, vflag_atom, host_start, + cpu_time, success, quat); + } + if (!success) + error->one("Out of memory on GPGPU"); + + if (host_start < inum) { + cpu_time = MPI_Wtime(); + cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh); + cpu_time = MPI_Wtime() - cpu_time; + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairRESquaredGPU::init_style() +{ + if (force->newton_pair) + error->all("Cannot use newton pair with GPU RESquared pair style"); + if (!atom->ellipsoid_flag) + error->all("Pair resquared requires atom style ellipsoid"); + + // per-type shape precalculations + // require that atom shapes are identical within each type + // if shape = 0 for point particle, set shape = 1 as required by Gay-Berne + + for (int i = 1; i <= atom->ntypes; i++) { + if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2])) + error->all("Pair resquared requires atoms with same type have same shape"); + if (shape1[i][0] == 0.0) + shape1[i][0] = shape1[i][1] = shape1[i][2] = 1.0; + shape2[i][0] = shape1[i][0]*shape1[i][0]; + shape2[i][1] = shape1[i][1]*shape1[i][1]; + shape2[i][2] = shape1[i][2]*shape1[i][2]; + lshape[i] = shape1[i][0]*shape1[i][1]*shape1[i][2]; + } + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = re_gpu_init(atom->ntypes+1, shape1, well, cutsq, sigma, + epsilon, lshape, form, lj1, lj2, lj3, lj4, offset, + force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode != GPU_NEIGH) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } + quat_nmax = static_cast(1.1 * (atom->nlocal + atom->nghost)); + memory->grow(quat, quat_nmax, 4, "pair:quat"); +} + +/* ---------------------------------------------------------------------- */ + +double PairRESquaredGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + memory->usage(quat,quat_nmax)+re_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) +{ + int i,j,ii,jj,jnum,itype,jtype; + double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; + double fforce[3],ttor[3],rtor[3],r12[3]; + int *jlist; + RE2Vars wi,wj; + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double **tor = atom->torque; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + itype = type[i]; + + // not a LJ sphere + + if (lshape[itype] != 0.0) precompute_i(i,wi); + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + // r12 = center to center vector + + r12[0] = x[j][0]-x[i][0]; + r12[1] = x[j][1]-x[i][1]; + r12[2] = x[j][2]-x[i][2]; + rsq = MathExtra::dot3(r12,r12); + jtype = type[j]; + + // compute if less than cutoff + + if (rsq < cutsq[itype][jtype]) { + switch (form[itype][jtype]) { + + case SPHERE_SPHERE: + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= -r2inv; + if (eflag) one_eng = + r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) - + offset[itype][jtype]; + fforce[0] = r12[0]*forcelj; + fforce[1] = r12[1]*forcelj; + fforce[2] = r12[2]*forcelj; + break; + + case SPHERE_ELLIPSE: + precompute_i(j,wj); + one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false); + break; + + case ELLIPSE_SPHERE: + one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true); + tor[i][0] += ttor[0]*factor_lj; + tor[i][1] += ttor[1]*factor_lj; + tor[i][2] += ttor[2]*factor_lj; + break; + + default: + precompute_i(j,wj); + one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor); + tor[i][0] += ttor[0]*factor_lj; + tor[i][1] += ttor[1]*factor_lj; + tor[i][2] += ttor[2]*factor_lj; + + break; + } + + fforce[0] *= factor_lj; + fforce[1] *= factor_lj; + fforce[2] *= factor_lj; + f[i][0] += fforce[0]; + f[i][1] += fforce[1]; + f[i][2] += fforce[2]; + + if (eflag) evdwl = factor_lj*one_eng; + + if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1], + fforce[2],-r12[0],-r12[1],-r12[2]); + } + } + } +} diff --git a/src/GPU/pair_resquared_gpu.h b/src/GPU/pair_resquared_gpu.h new file mode 100644 index 0000000000..0fd24ae6dc --- /dev/null +++ b/src/GPU/pair_resquared_gpu.h @@ -0,0 +1,49 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(resquared/gpu,PairRESquaredGPU) + +#else + +#ifndef LMP_PAIR_RESQUARED_GPU_H +#define LMP_PAIR_RESQUARED_GPU_H + +#include "pair_resquared.h" + +namespace LAMMPS_NS { + +class PairRESquaredGPU : public PairRESquared { + public: + PairRESquaredGPU(LAMMPS *lmp); + ~PairRESquaredGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_PAIR, GPU_NEIGH }; + + private: + int *olist; + int gpu_mode; + double cpu_time; + int *gpulist; + int quat_nmax; + double **quat; +}; + +} +#endif +#endif