From a71f5a0c20f7e23c3d5cfe2e73eff38aa4dcb5c3 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 22 Jul 2017 22:57:37 -0500 Subject: [PATCH] Enabled again neigh no with tpa > 1 for 3-body gpu styles for backward compatibility, could be slower than neigh no tpa 1 in many cases --- lib/gpu/lal_base_three.cpp | 1 - lib/gpu/lal_sw.cu | 92 ++++++++++++++++---------- lib/gpu/lal_tersoff.cu | 131 ++++++++++++++++++++++--------------- lib/gpu/lal_tersoff_mod.cu | 130 ++++++++++++++++++++++-------------- lib/gpu/lal_tersoff_zbl.cu | 130 ++++++++++++++++++++++-------------- lib/gpu/lal_vashishta.cu | 82 ++++++++++++++--------- 6 files changed, 349 insertions(+), 217 deletions(-) diff --git a/lib/gpu/lal_base_three.cpp b/lib/gpu/lal_base_three.cpp index 5f3c57337e..aa77a48c66 100644 --- a/lib/gpu/lal_base_three.cpp +++ b/lib/gpu/lal_base_three.cpp @@ -73,7 +73,6 @@ int BaseThreeT::init_three(const int nlocal, const int nall, if (_threads_per_atom>1 && gpu_nbor==0) { // neigh no and tpa > 1 nbor->packing(true); _nbor_data=&(nbor->dev_packed); - _threads_per_atom = 1; // enforce tpa = 1 for now } else // neigh yes or tpa == 1 _nbor_data=&(nbor->dev_nbor); if (_threads_per_atom*_threads_per_atom>device->warp_size()) diff --git a/lib/gpu/lal_sw.cu b/lib/gpu/lal_sw.cu index 7dea52898e..a5c9f49d08 100644 --- a/lib/gpu/lal_sw.cu +++ b/lib/gpu/lal_sw.cu @@ -167,7 +167,6 @@ __kernel void k_sw_short_nbor(const __global numtyp4 *restrict x_, numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; int jtype=jx.w; jtype=map[jtype]; - int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype]; // Compute r12 @@ -217,8 +216,8 @@ __kernel void k_sw(const __global numtyp4 *restrict x_, __syncthreads(); if (ii0) energy += (param3_bigh*reta+vc2-vc3-param3_bigw*r6inv-r*param3_dvrc+param3_c0); @@ -435,7 +436,7 @@ __kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_, if (ii