From 0425dd037e7b03a2d61b9ba45321ba2a0e882d4b Mon Sep 17 00:00:00 2001 From: "W. Michael Brown" Date: Tue, 24 May 2011 20:15:04 -0400 Subject: [PATCH] Removing nall from kernel calls. Changing ellipsoid neighbor calls to use a correct block size. --- lib/gpu/base_ellipsoid.cpp | 5 ++--- lib/gpu/cmm_cut_gpu_kernel.cu | 4 ++-- lib/gpu/cmm_cut_gpu_memory.cpp | 6 ++---- lib/gpu/cmmc_long_gpu_kernel.cu | 9 ++++----- lib/gpu/cmmc_long_gpu_memory.cpp | 5 ++--- lib/gpu/cmmc_msm_gpu_kernel.cu | 9 ++++----- lib/gpu/cmmc_msm_gpu_memory.cpp | 8 +++----- lib/gpu/crml_gpu_kernel.cu | 13 ++++++------- lib/gpu/crml_gpu_memory.cpp | 10 ++++------ lib/gpu/ellipsoid_nbor.cu | 14 ++++++++------ lib/gpu/gayberne.cpp | 27 +++++++++++++++------------ lib/gpu/gayberne.cu | 2 +- lib/gpu/gayberne_lj.cu | 6 +++--- lib/gpu/lj96_cut_gpu_kernel.cu | 5 ++--- lib/gpu/lj96_cut_gpu_memory.cpp | 6 ++---- lib/gpu/lj_class2_long.cpp | 9 +++------ lib/gpu/lj_class2_long.cu | 9 ++++----- lib/gpu/lj_cut_gpu_kernel.cu | 5 ++--- lib/gpu/lj_cut_gpu_memory.cpp | 6 ++---- lib/gpu/lj_expand_gpu_kernel.cu | 5 ++--- lib/gpu/lj_expand_gpu_memory.cpp | 6 ++---- lib/gpu/ljc_cut_gpu_kernel.cu | 8 ++++---- lib/gpu/ljc_cut_gpu_memory.cpp | 5 ++--- lib/gpu/ljcl_cut_gpu_kernel.cu | 9 ++++----- lib/gpu/ljcl_cut_gpu_memory.cpp | 12 +++++------- lib/gpu/morse_gpu_kernel.cu | 5 ++--- lib/gpu/morse_gpu_memory.cpp | 6 ++---- lib/gpu/pair_gpu_build_kernel.cu | 2 +- lib/gpu/pair_gpu_nbor.cpp | 2 +- lib/gpu/re_squared.cpp | 31 +++++++++++++++++-------------- lib/gpu/re_squared.cu | 2 +- lib/gpu/re_squared_lj.cu | 9 ++++----- 32 files changed, 118 insertions(+), 142 deletions(-) diff --git a/lib/gpu/base_ellipsoid.cpp b/lib/gpu/base_ellipsoid.cpp index 55a4f26d02..45477662f6 100644 --- a/lib/gpu/base_ellipsoid.cpp +++ b/lib/gpu/base_ellipsoid.cpp @@ -236,17 +236,16 @@ void BaseEllipsoidT::pack_nbors(const int GX, const int BX, const int start, const int form_high, const bool shared_types, int ntypes) { int stride=nbor->nbor_pitch(); - int anall=atom->nall(); if (shared_types) { k_nbor_fast.set_size(GX,BX); k_nbor_fast.run(&atom->dev_x.begin(), &cut_form.begin(), &nbor->dev_nbor.begin(), &stride, &start, &inum, - &nbor->dev_packed.begin(), &form_low, &form_high, &anall); + &nbor->dev_packed.begin(), &form_low, &form_high); } else { k_nbor.set_size(GX,BX); k_nbor.run(&atom->dev_x.begin(), &cut_form.begin(), &ntypes, &nbor->dev_nbor.begin(), &stride, &start, &inum, - &nbor->dev_packed.begin(), &form_low, &form_high, &anall); + &nbor->dev_packed.begin(), &form_low, &form_high); } } diff --git a/lib/gpu/cmm_cut_gpu_kernel.cu b/lib/gpu/cmm_cut_gpu_kernel.cu index f99e7f06ac..a5521f516b 100644 --- a/lib/gpu/cmm_cut_gpu_kernel.cu +++ b/lib/gpu/cmm_cut_gpu_kernel.cu @@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); @@ -241,7 +241,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global numtyp* sp_lj_in,__global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); diff --git a/lib/gpu/cmm_cut_gpu_memory.cpp b/lib/gpu/cmm_cut_gpu_memory.cpp index 8a5949c9e7..8d18c99d7c 100644 --- a/lib/gpu/cmm_cut_gpu_memory.cpp +++ b/lib/gpu/cmm_cut_gpu_memory.cpp @@ -130,7 +130,6 @@ void CMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -141,15 +140,14 @@ void CMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->_threads_per_atom); + &ainum, &nbor_pitch, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(), &_cmm_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->_threads_per_atom); + &nbor_pitch, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/cmmc_long_gpu_kernel.cu b/lib/gpu/cmmc_long_gpu_kernel.cu index a47a9267a1..39ae01cb78 100644 --- a/lib/gpu/cmmc_long_gpu_kernel.cu +++ b/lib/gpu/cmmc_long_gpu_kernel.cu @@ -102,7 +102,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, __global numtyp *q_ , const numtyp cut_coulsq, const numtyp qqrd2e, const numtyp g_ewald, const int t_per_atom) { @@ -295,10 +295,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - __global numtyp *q_ , const numtyp cut_coulsq, - const numtyp qqrd2e, const numtyp g_ewald, - const int t_per_atom) { + const int nbor_pitch, __global numtyp *q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const numtyp g_ewald, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/cmmc_long_gpu_memory.cpp b/lib/gpu/cmmc_long_gpu_memory.cpp index e2f99fceca..c6291982d6 100644 --- a/lib/gpu/cmmc_long_gpu_memory.cpp +++ b/lib/gpu/cmmc_long_gpu_memory.cpp @@ -141,7 +141,6 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -152,7 +151,7 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, + &ainum, &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, &_qqrd2e, &_g_ewald, &this->_threads_per_atom); } else { @@ -161,7 +160,7 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->atom->dev_q.begin(), + &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, &_qqrd2e, &_g_ewald, &this->_threads_per_atom); } diff --git a/lib/gpu/cmmc_msm_gpu_kernel.cu b/lib/gpu/cmmc_msm_gpu_kernel.cu index 3ee88d2a3d..09fad801eb 100644 --- a/lib/gpu/cmmc_msm_gpu_kernel.cu +++ b/lib/gpu/cmmc_msm_gpu_kernel.cu @@ -94,7 +94,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, __global numtyp *q_, const numtyp cut_coulsq, const numtyp qqrd2e, const int smooth, const int t_per_atom) { @@ -314,10 +314,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - __global numtyp *q_ , const numtyp cut_coulsq, - const numtyp qqrd2e, const int smooth, - const int t_per_atom) { + const int nbor_pitch, __global numtyp *q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const int smooth, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/cmmc_msm_gpu_memory.cpp b/lib/gpu/cmmc_msm_gpu_memory.cpp index ca051d4803..22d69a33e2 100644 --- a/lib/gpu/cmmc_msm_gpu_memory.cpp +++ b/lib/gpu/cmmc_msm_gpu_memory.cpp @@ -141,7 +141,6 @@ void CMMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -152,7 +151,7 @@ void CMMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, + &ainum, &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, &_qqrd2e, &_smooth, &this->_threads_per_atom); } else { @@ -161,9 +160,8 @@ void CMMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->atom->dev_q.begin(), - &_cut_coulsq, &_qqrd2e, &_smooth, - &this->_threads_per_atom); + &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, + &_qqrd2e, &_smooth, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/crml_gpu_kernel.cu b/lib/gpu/crml_gpu_kernel.cu index dfdc7af3cd..f2ba74c8b6 100644 --- a/lib/gpu/crml_gpu_kernel.cu +++ b/lib/gpu/crml_gpu_kernel.cu @@ -103,12 +103,11 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - __global numtyp *q_, const numtyp cut_coulsq, - const numtyp qqrd2e, const numtyp g_ewald, - const numtyp denom_lj, const numtyp cut_bothsq, - const numtyp cut_ljsq, const numtyp cut_lj_innersq, - const int t_per_atom) { + const int nbor_pitch, __global numtyp *q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const numtyp g_ewald, const numtyp denom_lj, + const numtyp cut_bothsq, const numtyp cut_ljsq, + const numtyp cut_lj_innersq, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); @@ -300,7 +299,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in, __global numtyp* sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, __global numtyp *q_, const numtyp cut_coulsq, const numtyp qqrd2e, const numtyp g_ewald, const numtyp denom_lj, diff --git a/lib/gpu/crml_gpu_memory.cpp b/lib/gpu/crml_gpu_memory.cpp index 6661f67585..9c41cced1a 100644 --- a/lib/gpu/crml_gpu_memory.cpp +++ b/lib/gpu/crml_gpu_memory.cpp @@ -145,7 +145,6 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -155,10 +154,9 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->atom->dev_q.begin(), &_cut_coulsq, - &_qqrd2e, &_g_ewald, &_denom_lj, &_cut_bothsq, - &_cut_ljsq, &_cut_lj_innersq, + &ainum, &nbor_pitch, &this->atom->dev_q.begin(), + &_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj, + &_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); @@ -166,7 +164,7 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->atom->dev_q.begin(), + &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj, &_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq, &this->_threads_per_atom); diff --git a/lib/gpu/ellipsoid_nbor.cu b/lib/gpu/ellipsoid_nbor.cu index 67f9e631b4..3a83b65666 100644 --- a/lib/gpu/ellipsoid_nbor.cu +++ b/lib/gpu/ellipsoid_nbor.cu @@ -42,6 +42,10 @@ #define numtyp4 float4 #endif +#define SBBITS 30 +#define NEIGHMASK 0x3FFFFFFF +__inline int sbmask(int j) { return j >> SBBITS & 3; } + // --------------------------------------------------------------------------- // Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access // -- Only unpack neighbors matching the specified inclusive range of forms @@ -51,7 +55,7 @@ __kernel void kernel_nbor(__global numtyp4 *x_, __global numtyp2 *cut_form, const int ntypes, __global int *dev_nbor, const int nbor_pitch, const int start, const int inum, __global int *dev_ij, const int form_low, - const int form_high, const int nall) { + const int form_high) { // ii indexes the two interacting particles in gi int ii=GLOBAL_ID_X+start; @@ -71,8 +75,7 @@ __kernel void kernel_nbor(__global numtyp4 *x_, __global numtyp2 *cut_form, int newj=0; for ( ; nbor=nall) - j%=nall; + j &= NEIGHMASK; numtyp4 jx=x_[j]; int jtype=jx.w; int mtype=itype+jtype; @@ -107,7 +110,7 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form, __global int *dev_nbor, const int nbor_pitch, const int start, const int inum, __global int *dev_ij, const int form_low, - const int form_high, const int nall) { + const int form_high) { int ii=THREAD_ID_X; __local int form[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; @@ -135,8 +138,7 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form, int newj=0; for ( ; nbor=nall) - j%=nall; + j &= NEIGHMASK; numtyp4 jx=x_[j]; int jtype=jx.w; int mtype=itype+jtype; diff --git a/lib/gpu/gayberne.cpp b/lib/gpu/gayberne.cpp index d6299d9f94..643e86cd31 100644 --- a/lib/gpu/gayberne.cpp +++ b/lib/gpu/gayberne.cpp @@ -191,11 +191,9 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { else vflag=0; - int GX=static_cast(ceil(static_cast(this->ans->inum())/ - (BX/this->_threads_per_atom))); + int GX, NGX; int stride=this->nbor->nbor_pitch(); int ainum=this->ans->inum(); - int anall=this->atom->nall(); if (this->_multiple_forms) { this->time_nbor1.start(); @@ -203,7 +201,8 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { // ------------ ELLIPSE_ELLIPSE and ELLIPSE_SPHERE --------------- GX=static_cast(ceil(static_cast(this->_last_ellipse)/ (BX/this->_threads_per_atom))); - this->pack_nbors(GX,BX, 0, this->_last_ellipse,ELLIPSE_SPHERE, + NGX=static_cast(ceil(static_cast(this->_last_ellipse)/BX)); + this->pack_nbors(NGX,BX, 0, this->_last_ellipse,ELLIPSE_SPHERE, ELLIPSE_ELLIPSE,_shared_types,_lj_types); this->time_nbor1.stop(); @@ -214,7 +213,7 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { &this->gamma_upsilon_mu.begin(), &this->sigma_epsilon.begin(), &this->_lj_types, &this->lshape.begin(), &this->nbor->dev_nbor.begin(), &stride, &this->ans->dev_ans.begin(),&ainum,&this->ans->dev_engv.begin(), - &this->dev_error.begin(), &eflag, &vflag, &this->_last_ellipse, &anall, + &this->dev_error.begin(), &eflag, &vflag, &this->_last_ellipse, &this->_threads_per_atom); this->time_ellipsoid.stop(); @@ -234,7 +233,9 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { GX=static_cast(ceil(static_cast(this->ans->inum()- this->_last_ellipse)/ (BX/this->_threads_per_atom))); - this->pack_nbors(GX,BX,this->_last_ellipse,this->ans->inum(), + NGX=static_cast(ceil(static_cast(this->ans->inum()- + this->_last_ellipse)/BX)); + this->pack_nbors(NGX,BX,this->_last_ellipse,this->ans->inum(), SPHERE_ELLIPSE,SPHERE_ELLIPSE,_shared_types,_lj_types); this->time_nbor2.stop(); @@ -246,7 +247,7 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { &this->sigma_epsilon.begin(), &this->_lj_types, &this->lshape.begin(), &this->nbor->dev_nbor.begin(), &stride, &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &this->dev_error.begin(), &eflag, - &vflag, &this->_last_ellipse, &ainum, &anall, &this->_threads_per_atom); + &vflag, &this->_last_ellipse, &ainum, &this->_threads_per_atom); this->time_ellipsoid2.stop(); } else { this->ans->dev_ans.zero(); @@ -269,7 +270,7 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { &this->lj3.begin(), &this->gamma_upsilon_mu.begin(), &stride, &this->nbor->dev_packed.begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &this->dev_error.begin(), - &eflag, &vflag, &this->_last_ellipse, &ainum, &anall, + &eflag, &vflag, &this->_last_ellipse, &ainum, &this->_threads_per_atom); } else { this->k_lj.set_size(GX,BX); @@ -277,14 +278,16 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { &this->lj3.begin(), &this->_lj_types, &this->gamma_upsilon_mu.begin(), &stride, &this->nbor->dev_packed.begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &this->dev_error.begin(), &eflag, - &vflag, &this->_last_ellipse, &ainum, &anall, - &this->_threads_per_atom); + &vflag, &this->_last_ellipse, &ainum, &this->_threads_per_atom); } } this->time_lj.stop(); } else { + GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + NGX=static_cast(ceil(static_cast(this->ans->inum())/BX)); this->time_nbor1.start(); - this->pack_nbors(GX, BX, 0, this->ans->inum(),SPHERE_SPHERE, + this->pack_nbors(NGX, BX, 0, this->ans->inum(),SPHERE_SPHERE, ELLIPSE_ELLIPSE,_shared_types,_lj_types); this->time_nbor1.stop(); this->time_ellipsoid.start(); @@ -295,7 +298,7 @@ void GayBerneT::loop(const bool _eflag, const bool _vflag) { &this->_lj_types, &this->lshape.begin(), &this->nbor->dev_nbor.begin(), &stride, &this->ans->dev_ans.begin(), &ainum, &this->ans->dev_engv.begin(), &this->dev_error.begin(), - &eflag, &vflag, &ainum, &anall, &this->_threads_per_atom); + &eflag, &vflag, &ainum, &this->_threads_per_atom); this->time_ellipsoid.stop(); } } diff --git a/lib/gpu/gayberne.cu b/lib/gpu/gayberne.cu index 70f5ca8672..8964dd284c 100644 --- a/lib/gpu/gayberne.cu +++ b/lib/gpu/gayberne.cu @@ -95,7 +95,7 @@ __kernel void kernel_ellipsoid(__global numtyp4* x_,__global numtyp4 *q, __global acctyp4 *ans, const int astride, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/gayberne_lj.cu b/lib/gpu/gayberne_lj.cu index 0f014cac4c..4bd3a1f82a 100644 --- a/lib/gpu/gayberne_lj.cu +++ b/lib/gpu/gayberne_lj.cu @@ -32,7 +32,7 @@ __kernel void kernel_sphere_ellipsoid(__global numtyp4 *x_,__global numtyp4 *q, __global acctyp4 *ans, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag,const int start, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom+start; @@ -306,7 +306,7 @@ __kernel void kernel_lj(__global numtyp4 *x_, __global numtyp4 *lj1, __global acctyp4 *ans, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int start, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom+start; @@ -448,7 +448,7 @@ __kernel void kernel_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global acctyp4 *ans, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int start, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom+start; diff --git a/lib/gpu/lj96_cut_gpu_kernel.cu b/lib/gpu/lj96_cut_gpu_kernel.cu index 1de9a8a7bf..6442089193 100644 --- a/lib/gpu/lj96_cut_gpu_kernel.cu +++ b/lib/gpu/lj96_cut_gpu_kernel.cu @@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); @@ -234,8 +234,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - const int t_per_atom) { + const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/lj96_cut_gpu_memory.cpp b/lib/gpu/lj96_cut_gpu_memory.cpp index 0b066c0973..4cc61a5d03 100644 --- a/lib/gpu/lj96_cut_gpu_memory.cpp +++ b/lib/gpu/lj96_cut_gpu_memory.cpp @@ -130,7 +130,6 @@ void LJ96_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -141,15 +140,14 @@ void LJ96_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->_threads_per_atom); + &ainum, &nbor_pitch, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(), &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->_threads_per_atom); + &nbor_pitch, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/lj_class2_long.cpp b/lib/gpu/lj_class2_long.cpp index f70945690d..b7c0210e9f 100644 --- a/lib/gpu/lj_class2_long.cpp +++ b/lib/gpu/lj_class2_long.cpp @@ -139,7 +139,6 @@ void LJClass2LongT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -150,8 +149,7 @@ void LJClass2LongT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->atom->dev_q.begin(), &_cut_coulsq, + &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, &_qqrd2e, &_g_ewald, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); @@ -159,9 +157,8 @@ void LJClass2LongT::loop(const bool _eflag, const bool _vflag) { &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->atom->dev_q.begin(), - &_cut_coulsq, &_qqrd2e, &_g_ewald, - &this->_threads_per_atom); + &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, + &_qqrd2e, &_g_ewald, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/lj_class2_long.cu b/lib/gpu/lj_class2_long.cu index 2b113c23af..8225ec1a9e 100644 --- a/lib/gpu/lj_class2_long.cu +++ b/lib/gpu/lj_class2_long.cu @@ -100,7 +100,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, __global numtyp *q_, const numtyp cut_coulsq, const numtyp qqrd2e, const numtyp g_ewald, const int t_per_atom) { @@ -284,10 +284,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - __global numtyp *q_ , const numtyp cut_coulsq, - const numtyp qqrd2e, const numtyp g_ewald, - const int t_per_atom) { + const int nbor_pitch, __global numtyp *q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const numtyp g_ewald, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/lj_cut_gpu_kernel.cu b/lib/gpu/lj_cut_gpu_kernel.cu index 9ef698cd09..42073f983d 100644 --- a/lib/gpu/lj_cut_gpu_kernel.cu +++ b/lib/gpu/lj_cut_gpu_kernel.cu @@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); @@ -233,8 +233,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - const int t_per_atom) { + const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/lj_cut_gpu_memory.cpp b/lib/gpu/lj_cut_gpu_memory.cpp index a294eb647f..40622c576c 100644 --- a/lib/gpu/lj_cut_gpu_memory.cpp +++ b/lib/gpu/lj_cut_gpu_memory.cpp @@ -130,7 +130,6 @@ void LJL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -141,15 +140,14 @@ void LJL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->_threads_per_atom); + &ainum, &nbor_pitch, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(), &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->_threads_per_atom); + &nbor_pitch, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/lj_expand_gpu_kernel.cu b/lib/gpu/lj_expand_gpu_kernel.cu index 26fbefacf8..1a63ef803c 100644 --- a/lib/gpu/lj_expand_gpu_kernel.cu +++ b/lib/gpu/lj_expand_gpu_kernel.cu @@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); @@ -236,8 +236,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - const int t_per_atom) { + const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/lj_expand_gpu_memory.cpp b/lib/gpu/lj_expand_gpu_memory.cpp index fe5bf0b513..714aa7c77f 100644 --- a/lib/gpu/lj_expand_gpu_memory.cpp +++ b/lib/gpu/lj_expand_gpu_memory.cpp @@ -130,7 +130,6 @@ void LJE_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -141,15 +140,14 @@ void LJE_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->_threads_per_atom); + &ainum, &nbor_pitch, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(), &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->_threads_per_atom); + &nbor_pitch, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/ljc_cut_gpu_kernel.cu b/lib/gpu/ljc_cut_gpu_kernel.cu index ad1e530712..ca37e1235f 100644 --- a/lib/gpu/ljc_cut_gpu_kernel.cu +++ b/lib/gpu/ljc_cut_gpu_kernel.cu @@ -94,7 +94,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, __global numtyp *q_ , __global numtyp *cutsq, const numtyp qqrd2e, const int t_per_atom) { @@ -270,9 +270,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - __global numtyp *q_ , __global numtyp *_cutsq, - const numtyp qqrd2e, const int t_per_atom) { + const int nbor_pitch, __global numtyp *q_, + __global numtyp *_cutsq, const numtyp qqrd2e, + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/ljc_cut_gpu_memory.cpp b/lib/gpu/ljc_cut_gpu_memory.cpp index 642ff6ecc7..df089a626a 100644 --- a/lib/gpu/ljc_cut_gpu_memory.cpp +++ b/lib/gpu/ljc_cut_gpu_memory.cpp @@ -142,7 +142,6 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -153,7 +152,7 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, + &ainum, &nbor_pitch, &this->atom->dev_q.begin(), &cutsq.begin(), &_qqrd2e, &this->_threads_per_atom); } else { @@ -162,7 +161,7 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->atom->dev_q.begin(), + &nbor_pitch, &this->atom->dev_q.begin(), &cutsq.begin(), &_qqrd2e, &this->_threads_per_atom); } this->time_pair.stop(); diff --git a/lib/gpu/ljcl_cut_gpu_kernel.cu b/lib/gpu/ljcl_cut_gpu_kernel.cu index ddde1dec32..e177f1a0fd 100644 --- a/lib/gpu/ljcl_cut_gpu_kernel.cu +++ b/lib/gpu/ljcl_cut_gpu_kernel.cu @@ -102,7 +102,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, __global numtyp *q_, const numtyp cut_coulsq, const numtyp qqrd2e, const numtyp g_ewald, const int t_per_atom) { @@ -286,10 +286,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - __global numtyp *q_ , const numtyp cut_coulsq, - const numtyp qqrd2e, const numtyp g_ewald, - const int t_per_atom) { + const int nbor_pitch, __global numtyp *q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const numtyp g_ewald, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/ljcl_cut_gpu_memory.cpp b/lib/gpu/ljcl_cut_gpu_memory.cpp index f37e6b1857..f36d2a68c2 100644 --- a/lib/gpu/ljcl_cut_gpu_memory.cpp +++ b/lib/gpu/ljcl_cut_gpu_memory.cpp @@ -140,7 +140,6 @@ void LJCL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -151,18 +150,17 @@ void LJCL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->atom->dev_q.begin(), &_cut_coulsq, - &_qqrd2e, &_g_ewald, &this->_threads_per_atom); + &ainum, &nbor_pitch, &this->atom->dev_q.begin(), + &_cut_coulsq, &_qqrd2e, &_g_ewald, + &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(), &_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->atom->dev_q.begin(), - &_cut_coulsq, &_qqrd2e, &_g_ewald, - &this->_threads_per_atom); + &nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq, + &_qqrd2e, &_g_ewald, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/morse_gpu_kernel.cu b/lib/gpu/morse_gpu_kernel.cu index 8832f58c64..d4cf1c325c 100644 --- a/lib/gpu/morse_gpu_kernel.cu +++ b/lib/gpu/morse_gpu_kernel.cu @@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *mor1, __global numtyp *sp_lj_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, - const int vflag, const int inum, const int nall, + const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); @@ -234,8 +234,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *mor1_in, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, - const int nall, const int nbor_pitch, - const int t_per_atom) { + const int nbor_pitch, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/morse_gpu_memory.cpp b/lib/gpu/morse_gpu_memory.cpp index f146b39215..d188683915 100644 --- a/lib/gpu/morse_gpu_memory.cpp +++ b/lib/gpu/morse_gpu_memory.cpp @@ -129,7 +129,6 @@ void MOR_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); - int anall=this->atom->nall(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { @@ -140,15 +139,14 @@ void MOR_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) { &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, - &ainum, &anall, &nbor_pitch, - &this->_threads_per_atom); + &ainum, &nbor_pitch, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->dev_x.begin(), &mor1.begin(), &mor2.begin(), &_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &eflag, &vflag, &ainum, - &anall, &nbor_pitch, &this->_threads_per_atom); + &nbor_pitch, &this->_threads_per_atom); } this->time_pair.stop(); } diff --git a/lib/gpu/pair_gpu_build_kernel.cu b/lib/gpu/pair_gpu_build_kernel.cu index 33742a4cba..d1ed9fd743 100644 --- a/lib/gpu/pair_gpu_build_kernel.cu +++ b/lib/gpu/pair_gpu_build_kernel.cu @@ -253,7 +253,7 @@ __kernel void kernel_special(__global int *dev_nbor, __global int *host_nbor_list, __global int *host_numj, __global int *tag, __global int *nspecial, __global int *special, - int inum, int nt, int nall, int max_nbors) { + int inum, int nt, int max_nbors) { // ii indexes the two interacting particles in gi int ii=GLOBAL_ID_X; diff --git a/lib/gpu/pair_gpu_nbor.cpp b/lib/gpu/pair_gpu_nbor.cpp index df138a7eff..499f3c132c 100644 --- a/lib/gpu/pair_gpu_nbor.cpp +++ b/lib/gpu/pair_gpu_nbor.cpp @@ -389,7 +389,7 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum, _shared->k_special.run(&dev_nbor.begin(), &dev_host_nbor.begin(), &dev_host_numj.begin(), &atom.dev_tag.begin(), &dev_nspecial.begin(), &dev_special.begin(), - &inum, &nt, &nall, &_max_nbors); + &inum, &nt, &_max_nbors); } time_kernel.stop(); diff --git a/lib/gpu/re_squared.cpp b/lib/gpu/re_squared.cpp index 19dd077fa7..b27b6944ec 100644 --- a/lib/gpu/re_squared.cpp +++ b/lib/gpu/re_squared.cpp @@ -179,11 +179,9 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { else vflag=0; - int GX=static_cast(ceil(static_cast(this->ans->inum())/ - (BX/this->_threads_per_atom))); + int GX, NGX; int stride=this->nbor->nbor_pitch(); int ainum=this->ans->inum(); - int anall=this->atom->nall(); if (this->_multiple_forms) { if (this->_last_ellipse>0) { @@ -191,7 +189,8 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { this->time_nbor1.start(); GX=static_cast(ceil(static_cast(this->_last_ellipse)/ (BX/this->_threads_per_atom))); - this->pack_nbors(GX,BX, 0, this->_last_ellipse,ELLIPSE_ELLIPSE, + NGX=static_cast(ceil(static_cast(this->_last_ellipse)/BX)); + this->pack_nbors(NGX,BX, 0, this->_last_ellipse,ELLIPSE_ELLIPSE, ELLIPSE_ELLIPSE,_shared_types,_lj_types); this->time_nbor1.stop(); @@ -202,13 +201,13 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { &this->special_lj.begin(), &this->sigma_epsilon.begin(), &this->_lj_types, &this->nbor->dev_nbor.begin(), &stride, &this->ans->dev_ans.begin(),&ainum,&this->ans->dev_engv.begin(), - &this->dev_error.begin(), &eflag, &vflag, &this->_last_ellipse, &anall, + &this->dev_error.begin(), &eflag, &vflag, &this->_last_ellipse, &this->_threads_per_atom); this->time_ellipsoid.stop(); // ------------ ELLIPSE_SPHERE --------------- this->time_nbor2.start(); - this->pack_nbors(GX,BX, 0, this->_last_ellipse,ELLIPSE_SPHERE, + this->pack_nbors(NGX,BX, 0, this->_last_ellipse,ELLIPSE_SPHERE, ELLIPSE_SPHERE,_shared_types,_lj_types); this->time_nbor2.stop(); @@ -219,7 +218,7 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { &this->special_lj.begin(), &this->sigma_epsilon.begin(), &this->_lj_types, &this->nbor->dev_nbor.begin(), &stride, &this->ans->dev_ans.begin(),&ainum,&this->ans->dev_engv.begin(), - &this->dev_error.begin(), &eflag, &vflag, &this->_last_ellipse, &anall, + &this->dev_error.begin(), &eflag, &vflag, &this->_last_ellipse, &this->_threads_per_atom); this->time_ellipsoid2.stop(); @@ -236,7 +235,9 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { GX=static_cast(ceil(static_cast(this->ans->inum()- this->_last_ellipse)/ (BX/this->_threads_per_atom))); - this->pack_nbors(GX,BX,this->_last_ellipse,this->ans->inum(), + NGX=static_cast(ceil(static_cast(this->ans->inum()- + this->_last_ellipse)/BX)); + this->pack_nbors(NGX,BX,this->_last_ellipse,this->ans->inum(), SPHERE_ELLIPSE,SPHERE_ELLIPSE,_shared_types,_lj_types); this->time_nbor3.stop(); @@ -248,7 +249,7 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { &this->sigma_epsilon.begin(), &this->_lj_types, &this->nbor->dev_nbor.begin(), &stride, &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &this->dev_error.begin(), &eflag, - &vflag, &this->_last_ellipse, &ainum, &anall, &this->_threads_per_atom); + &vflag, &this->_last_ellipse, &ainum, &this->_threads_per_atom); this->time_ellipsoid3.stop(); } else { this->ans->dev_ans.zero(); @@ -270,7 +271,7 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { &this->lj3.begin(), &this->special_lj.begin(), &stride, &this->nbor->dev_packed.begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &this->dev_error.begin(), - &eflag, &vflag, &this->_last_ellipse, &ainum, &anall, + &eflag, &vflag, &this->_last_ellipse, &ainum, &this->_threads_per_atom); } else { this->k_lj.set_size(GX,BX); @@ -278,14 +279,16 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { &this->lj3.begin(), &this->_lj_types, &this->special_lj.begin(), &stride, &this->nbor->dev_packed.begin(), &this->ans->dev_ans.begin(), &this->ans->dev_engv.begin(), &this->dev_error.begin(), &eflag, - &vflag, &this->_last_ellipse, &ainum, &anall, - &this->_threads_per_atom); + &vflag, &this->_last_ellipse, &ainum, &this->_threads_per_atom); } } this->time_lj.stop(); } else { + GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + NGX=static_cast(ceil(static_cast(this->ans->inum())/BX)); this->time_nbor1.start(); - this->pack_nbors(GX, BX, 0, this->ans->inum(),SPHERE_SPHERE, + this->pack_nbors(NGX, BX, 0, this->ans->inum(),SPHERE_SPHERE, ELLIPSE_ELLIPSE,_shared_types,_lj_types); this->time_nbor1.stop(); this->time_ellipsoid.start(); @@ -295,7 +298,7 @@ void RESquaredT::loop(const bool _eflag, const bool _vflag) { &this->special_lj.begin(), &this->sigma_epsilon.begin(), &this->_lj_types, &this->nbor->dev_nbor.begin(), &stride, &this->ans->dev_ans.begin(), &ainum, &this->ans->dev_engv.begin(), - &this->dev_error.begin(), &eflag, &vflag, &ainum, &anall, + &this->dev_error.begin(), &eflag, &vflag, &ainum, &this->_threads_per_atom); this->time_ellipsoid.stop(); } diff --git a/lib/gpu/re_squared.cu b/lib/gpu/re_squared.cu index a9484a9141..d91a04f5a2 100644 --- a/lib/gpu/re_squared.cu +++ b/lib/gpu/re_squared.cu @@ -47,7 +47,7 @@ __kernel void kernel_ellipsoid(__global numtyp4* x_,__global numtyp4 *q, const int astride, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; diff --git a/lib/gpu/re_squared_lj.cu b/lib/gpu/re_squared_lj.cu index 97045aa723..784dbe63e7 100644 --- a/lib/gpu/re_squared_lj.cu +++ b/lib/gpu/re_squared_lj.cu @@ -32,7 +32,7 @@ __kernel void kernel_ellipsoid_sphere(__global numtyp4* x_,__global numtyp4 *q, __global acctyp4 *ans, const int astride, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom; @@ -353,8 +353,7 @@ __kernel void kernel_sphere_ellipsoid(__global numtyp4 *x_,__global numtyp4 *q, const int stride, __global acctyp4 *ans, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag,const int start, - const int inum, const int nall, - const int t_per_atom) { + const int inum, const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom+start; @@ -605,7 +604,7 @@ __kernel void kernel_lj(__global numtyp4 *x_, __global numtyp4 *lj1, __global acctyp4 *ans, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int start, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom+start; @@ -747,7 +746,7 @@ __kernel void kernel_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in, __global acctyp4 *ans, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int start, const int inum, - const int nall, const int t_per_atom) { + const int t_per_atom) { int tid=THREAD_ID_X; int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); ii+=tid/t_per_atom+start;