added acceleration of lj/smooth on gpu
This commit is contained in:
@ -16,7 +16,7 @@
|
||||
#if defined(USE_OPENCL)
|
||||
#include "lj_smooth_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *lj=0;
|
||||
const char *lj_smooth=0;
|
||||
#else
|
||||
#include "lj_smooth_cubin.h"
|
||||
#endif
|
||||
@ -51,7 +51,7 @@ int LJSMOOTHT::init(const int ntypes,
|
||||
const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4,
|
||||
double **cut_inner, double **cut_inner_sq) {
|
||||
int success;
|
||||
@ -89,13 +89,17 @@ int LJSMOOTHT::init(const int ntypes,
|
||||
this->atom->type_pack4(ntypes,lj_types,ljsw,host_write,host_ljsw1,host_ljsw2,
|
||||
host_ljsw3,host_ljsw4);
|
||||
|
||||
ljsw0.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,ljsw0,host_write,host_ljsw0,cut_inner,host_ljsw2,
|
||||
host_ljsw3);
|
||||
|
||||
UCL_H_Vec<double> dview;
|
||||
sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
|
||||
dview.view(host_special_lj,4,*(this->ucl_device));
|
||||
ucl_copy(sp_lj,dview,false);
|
||||
|
||||
_allocated=true;
|
||||
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+ljsw.row_bytes()+sp_lj.row_bytes();
|
||||
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+ljsw.row_bytes()+ljsw0.row_bytes()+sp_lj.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -103,7 +107,7 @@ template <class numtyp, class acctyp>
|
||||
void LJSMOOTHT::reinit(const int ntypes, double **host_cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset,
|
||||
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4,
|
||||
double **cut_inner, double **cut_inner_sq) {
|
||||
// Allocate a host write buffer for data initialization
|
||||
@ -119,6 +123,8 @@ void LJSMOOTHT::reinit(const int ntypes, double **host_cutsq, double **host_lj1,
|
||||
host_offset, cut_inner);
|
||||
this->atom->type_pack4(ntypes,_lj_types,ljsw,host_write,host_ljsw1,host_ljsw2,
|
||||
host_ljsw3,host_ljsw4);
|
||||
this->atom->type_pack4(ntypes,_lj_types,ljsw0,host_write,host_ljsw0, cut_inner, host_ljsw2,
|
||||
host_ljsw3);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -130,6 +136,7 @@ void LJSMOOTHT::clear() {
|
||||
lj1.clear();
|
||||
lj3.clear();
|
||||
ljsw.clear();
|
||||
ljsw0.clear();
|
||||
sp_lj.clear();
|
||||
this->clear_atomic();
|
||||
}
|
||||
@ -165,14 +172,14 @@ void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) {
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
this->k_pair_fast.set_size(GX,BX);
|
||||
this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &ljsw, &sp_lj,
|
||||
this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &ljsw, &ljsw0, &sp_lj,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag,
|
||||
&vflag, &ainum, &nbor_pitch,
|
||||
&this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->x, &lj1, &lj3, &ljsw, &_lj_types, &sp_lj,
|
||||
this->k_pair.run(&this->atom->x, &lj1, &lj3, &ljsw, &ljsw0, &_lj_types, &sp_lj,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
|
||||
@ -28,6 +28,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const __global numtyp4 *restrict ljsw,
|
||||
const __global numtyp4 *restrict ljsw0,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj,
|
||||
const __global int * dev_nbor,
|
||||
@ -82,7 +83,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
else {
|
||||
r = sqrt(rsq);
|
||||
t = r - lj3[mtype].w;
|
||||
t = r - ljsw0[mtype].y;
|
||||
tsq = t*t;
|
||||
fskin = ljsw[mtype].x + ljsw[mtype].y*t +
|
||||
ljsw[mtype].z*tsq + ljsw[mtype].w*tsq*t;
|
||||
@ -98,7 +99,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_,
|
||||
if (rsq < lj1[mtype].w)
|
||||
e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z;
|
||||
else
|
||||
e = ljsw[mtype].x - ljsw[mtype].x*t -
|
||||
e = ljsw0[mtype].x - ljsw[mtype].x*t -
|
||||
ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 -
|
||||
ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z;
|
||||
|
||||
@ -125,6 +126,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp4 *restrict ljsw,
|
||||
const __global numtyp4 *restrict ljsw0,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
@ -191,7 +193,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
else {
|
||||
r = sqrt(rsq);
|
||||
t = r - lj3[mtype].w;
|
||||
t = r - ljsw0[mtype].y; //?
|
||||
//printf("%f\n", r - lj3[mtype].w);
|
||||
tsq = t*t;
|
||||
fskin = ljsw[mtype].x + ljsw[mtype].y*t +
|
||||
@ -208,7 +210,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_,
|
||||
if (rsq < lj1[mtype].w)
|
||||
e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z;
|
||||
else
|
||||
e = ljsw[mtype].x - ljsw[mtype].x*t -
|
||||
e = ljsw0[mtype].x - ljsw[mtype].x*t -
|
||||
ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 -
|
||||
ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z; //???
|
||||
|
||||
|
||||
@ -43,16 +43,16 @@ class LJSMOOTH : public BaseAtomic<numtyp, acctyp> {
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen,
|
||||
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4,
|
||||
double **host_ljsw0, double **host_ljsw1, double **host_ljsw2,
|
||||
double **host_ljsw3, double **host_ljsw4,
|
||||
double **cut_inner, double **cut_inner_sq);
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset,
|
||||
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4,
|
||||
double **host_ljsw0, double **host_ljsw1, double **host_ljsw2,
|
||||
double **host_ljsw3, double **host_ljsw4,
|
||||
double **cut_inner, double **cut_inner_sq);
|
||||
|
||||
/// Clear all host and device data
|
||||
@ -73,6 +73,8 @@ class LJSMOOTH : public BaseAtomic<numtyp, acctyp> {
|
||||
UCL_D_Vec<numtyp4> lj3;
|
||||
/// ljsw.x = ljsw1, ljsw.y = ljsw2, ljsw.z = ljsw3, ljsw.w = ljsw4
|
||||
UCL_D_Vec<numtyp4> ljsw;
|
||||
/// ljsw0
|
||||
UCL_D_Vec<numtyp4> ljsw0;
|
||||
/// Special LJ values
|
||||
UCL_D_Vec<numtyp> sp_lj;
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **offset, double *special_lj, const int inum,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4, double **cut_inner, double **cut_inner_sq) {
|
||||
LJSMTMF.clear();
|
||||
gpu_mode=LJSMTMF.device->gpu_mode();
|
||||
@ -59,7 +59,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3,
|
||||
host_lj4, offset, special_lj, inum, nall, 300,
|
||||
maxspecial, cell_size, gpu_split, screen,
|
||||
host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
|
||||
host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
|
||||
|
||||
LJSMTMF.device->world_barrier();
|
||||
if (message)
|
||||
@ -77,7 +77,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, special_lj, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3,
|
||||
cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3,
|
||||
host_ljsw4, cut_inner, cut_inner_sq);
|
||||
|
||||
LJSMTMF.device->gpu_barrier();
|
||||
@ -97,19 +97,19 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
// ---------------------------------------------------------------------------
|
||||
void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **offset, double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4, double **cut_inner, double **cut_inner_sq) {
|
||||
int world_me=LJSMTMF.device->world_me();
|
||||
int gpu_rank=LJSMTMF.device->gpu_rank();
|
||||
int procs_per_gpu=LJSMTMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
|
||||
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
|
||||
LJSMTMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
|
||||
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
|
||||
LJSMTMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,15 +45,15 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **offset, double *special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4,
|
||||
double **host_ljsw0, double **host_ljsw1, double **host_ljsw2,
|
||||
double **host_ljsw3, double **host_ljsw4,
|
||||
double **cut_inner, double **cut_innersq);
|
||||
|
||||
void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset,
|
||||
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
|
||||
double **host_ljsw4,
|
||||
double **host_ljsw0, double **host_ljsw1, double **host_ljsw2,
|
||||
double **host_ljsw3, double **host_ljsw4,
|
||||
double **cut_inner, double **cut_innersq);
|
||||
|
||||
void ljsmt_gpu_clear();
|
||||
@ -174,7 +174,7 @@ void PairLJSmoothGPU::init_style()
|
||||
int success = ljsmt_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
|
||||
offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen, ljsw1, ljsw2,
|
||||
cell_size, gpu_mode, screen, ljsw0, ljsw1, ljsw2,
|
||||
ljsw3, ljsw4, cut_inner, cut_inner_sq);
|
||||
GPU_EXTRA::check_flag(success,error,world);
|
||||
|
||||
@ -191,7 +191,7 @@ void PairLJSmoothGPU::reinit()
|
||||
{
|
||||
Pair::reinit();
|
||||
|
||||
ljsmt_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq);
|
||||
ljsmt_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Reference in New Issue
Block a user