Feb2021 GPU Package Update - GPU Package Files
This commit is contained in:
@ -14,6 +14,9 @@
|
||||
***************************************************************************/
|
||||
|
||||
#include "lal_answer.h"
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
#define AnswerT Answer<numtyp,acctyp>
|
||||
@ -56,7 +59,7 @@ bool AnswerT::alloc(const int inum) {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool AnswerT::init(const int inum, const bool charge, const bool rot,
|
||||
UCL_Device &devi) {
|
||||
UCL_Device &devi) {
|
||||
clear();
|
||||
|
||||
bool success=true;
|
||||
@ -81,6 +84,10 @@ bool AnswerT::init(const int inum, const bool charge, const bool rot,
|
||||
_time_cast=0.0;
|
||||
_time_cpu_idle=0.0;
|
||||
|
||||
success=success && (error_flag.alloc(1,*dev,UCL_READ_WRITE,
|
||||
UCL_WRITE_ONLY)==UCL_SUCCESS);
|
||||
if (success) error_flag.zero();
|
||||
|
||||
return success && alloc(ef_inum);
|
||||
}
|
||||
|
||||
@ -111,6 +118,7 @@ bool AnswerT::add_fields(const bool charge, const bool rot) {
|
||||
template <class numtyp, class acctyp>
|
||||
void AnswerT::clear() {
|
||||
_gpu_bytes=0;
|
||||
error_flag.clear();
|
||||
if (!_allocated)
|
||||
return;
|
||||
_allocated=false;
|
||||
@ -138,12 +146,21 @@ double AnswerT::host_memory_usage() const {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void AnswerT::copy_answers(const bool eflag, const bool vflag,
|
||||
const bool ef_atom, const bool vf_atom) {
|
||||
const bool ef_atom, const bool vf_atom,
|
||||
const int red_blocks) {
|
||||
time_answer.start();
|
||||
_eflag=eflag;
|
||||
_vflag=vflag;
|
||||
_ef_atom=ef_atom;
|
||||
_vf_atom=vf_atom;
|
||||
#ifdef LAL_NO_BLOCK_REDUCE
|
||||
_ev_stride=_inum;
|
||||
#else
|
||||
if (ef_atom || vf_atom)
|
||||
_ev_stride=_inum;
|
||||
else
|
||||
_ev_stride=red_blocks;
|
||||
#endif
|
||||
|
||||
int csize=_ev_fields;
|
||||
if (!eflag)
|
||||
@ -152,20 +169,24 @@ void AnswerT::copy_answers(const bool eflag, const bool vflag,
|
||||
csize-=6;
|
||||
|
||||
if (csize>0)
|
||||
engv.update_host(_inum*csize,true);
|
||||
engv.update_host(_ev_stride*csize,true);
|
||||
if (_rot)
|
||||
force.update_host(_inum*4*2,true);
|
||||
else
|
||||
force.update_host(_inum*4,true);
|
||||
time_answer.stop();
|
||||
|
||||
#ifndef GERYON_OCL_FLUSH
|
||||
force.flush();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void AnswerT::copy_answers(const bool eflag, const bool vflag,
|
||||
const bool ef_atom, const bool vf_atom,
|
||||
int *ilist) {
|
||||
const bool ef_atom, const bool vf_atom,
|
||||
int *ilist, const int red_blocks) {
|
||||
_ilist=ilist;
|
||||
copy_answers(eflag,vflag,ef_atom,vf_atom);
|
||||
copy_answers(eflag,vflag,ef_atom,vf_atom,red_blocks);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -177,21 +198,24 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
double evdwl=0.0;
|
||||
int vstart=0;
|
||||
if (_eflag) {
|
||||
for (int i=0; i<_inum; i++)
|
||||
#if (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp simd reduction(+:evdwl)
|
||||
#endif
|
||||
for (int i=0; i<_ev_stride; i++)
|
||||
evdwl+=engv[i];
|
||||
if (_ef_atom) {
|
||||
if (_ilist==nullptr) {
|
||||
for (int i=0; i<_inum; i++)
|
||||
for (int i=0; i<_ev_stride; i++)
|
||||
eatom[i]+=engv[i];
|
||||
} else {
|
||||
for (int i=0; i<_inum; i++)
|
||||
for (int i=0; i<_ev_stride; i++)
|
||||
eatom[_ilist[i]]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart=_inum;
|
||||
vstart=_ev_stride;
|
||||
}
|
||||
if (_vflag) {
|
||||
int iend=vstart+_inum;
|
||||
int iend=vstart+_ev_stride;
|
||||
for (int j=0; j<6; j++) {
|
||||
for (int i=vstart; i<iend; i++)
|
||||
virial[j]+=engv[i];
|
||||
@ -206,8 +230,8 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
vatom[_ilist[ii++]][j]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart+=_inum;
|
||||
iend+=_inum;
|
||||
vstart+=_ev_stride;
|
||||
iend+=_ev_stride;
|
||||
}
|
||||
}
|
||||
|
||||
@ -224,28 +248,36 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
return energy_virial(eatom,vatom,virial);
|
||||
|
||||
double evdwl=0.0;
|
||||
int ii, vstart=0, iend=_inum;
|
||||
int ii, vstart=0, iend=_ev_stride;
|
||||
if (_eflag) {
|
||||
iend=_inum*2;
|
||||
for (int i=0; i<_inum; i++)
|
||||
iend=_ev_stride*2;
|
||||
#if (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp simd reduction(+:evdwl)
|
||||
#endif
|
||||
for (int i=0; i<_ev_stride; i++)
|
||||
evdwl+=engv[i];
|
||||
for (int i=_inum; i<iend; i++)
|
||||
ecoul+=engv[i];
|
||||
double ecv=0.0;
|
||||
#if (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp simd reduction(+:ecv)
|
||||
#endif
|
||||
for (int i=_ev_stride; i<iend; i++)
|
||||
ecv+=engv[i];
|
||||
ecoul+=ecv;
|
||||
if (_ef_atom) {
|
||||
if (_ilist==nullptr) {
|
||||
for (int i=0; i<_inum; i++)
|
||||
for (int i=0; i<_ev_stride; i++)
|
||||
eatom[i]+=engv[i];
|
||||
for (int i=_inum; i<iend; i++)
|
||||
for (int i=_ev_stride; i<iend; i++)
|
||||
eatom[i]+=engv[i];
|
||||
} else {
|
||||
for (int i=0, ii=0; i<_inum; i++)
|
||||
for (int i=0, ii=0; i<_ev_stride; i++)
|
||||
eatom[_ilist[ii++]]+=engv[i];
|
||||
for (int i=_inum, ii=0; i<iend; i++)
|
||||
for (int i=_ev_stride, ii=0; i<iend; i++)
|
||||
eatom[_ilist[ii++]]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart=iend;
|
||||
iend+=_inum;
|
||||
iend+=_ev_stride;
|
||||
}
|
||||
if (_vflag) {
|
||||
for (int j=0; j<6; j++) {
|
||||
@ -260,8 +292,8 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
vatom[_ilist[ii++]][j]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart+=_inum;
|
||||
iend+=_inum;
|
||||
vstart+=_ev_stride;
|
||||
iend+=_ev_stride;
|
||||
}
|
||||
}
|
||||
|
||||
@ -270,38 +302,79 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void AnswerT::get_answers(double **f, double **tor) {
|
||||
int fl=0;
|
||||
if (_ilist==nullptr) {
|
||||
for (int i=0; i<_inum; i++) {
|
||||
f[i][0]+=force[fl];
|
||||
f[i][1]+=force[fl+1];
|
||||
f[i][2]+=force[fl+2];
|
||||
fl+=4;
|
||||
}
|
||||
if (_rot) {
|
||||
for (int i=0; i<_inum; i++) {
|
||||
tor[i][0]+=force[fl];
|
||||
tor[i][1]+=force[fl+1];
|
||||
tor[i][2]+=force[fl+2];
|
||||
fl+=4;
|
||||
typedef struct { double x,y,z; } vec3d;
|
||||
typedef struct { acctyp x,y,z,w; } vec4d_t;
|
||||
vec3d *fp=reinterpret_cast<vec3d*>(&(f[0][0]));
|
||||
vec4d_t *forcep=reinterpret_cast<vec4d_t*>(&(force[0]));
|
||||
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
#if (LAL_USE_OMP == 1)
|
||||
const int nthreads = omp_get_num_threads();
|
||||
const int tid = omp_get_thread_num();
|
||||
const int idelta = _inum / nthreads + 1;
|
||||
const int ifrom = tid * idelta;
|
||||
const int ito = std::min(ifrom + idelta, _inum);
|
||||
#else
|
||||
const int tid = 0;
|
||||
const int ifrom = 0;
|
||||
const int ito = _inum;
|
||||
#endif
|
||||
|
||||
for (int i=ifrom; i<ito; i++) {
|
||||
fp[i].x+=forcep[i].x;
|
||||
fp[i].y+=forcep[i].y;
|
||||
fp[i].z+=forcep[i].z;
|
||||
}
|
||||
if (_rot) {
|
||||
vec3d *torp=reinterpret_cast<vec3d*>(&(tor[0][0]));
|
||||
forcep=reinterpret_cast<vec4d_t*>(&(force[_inum*4]));
|
||||
for (int i=ifrom; i<ito; i++) {
|
||||
torp[i].x+=forcep[i].x;
|
||||
torp[i].y+=forcep[i].y;
|
||||
torp[i].z+=forcep[i].z;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i=0; i<_inum; i++) {
|
||||
int ii=_ilist[i];
|
||||
f[ii][0]+=force[fl];
|
||||
f[ii][1]+=force[fl+1];
|
||||
f[ii][2]+=force[fl+2];
|
||||
fl+=4;
|
||||
}
|
||||
if (_rot) {
|
||||
for (int i=0; i<_inum; i++) {
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
#if (LAL_USE_OMP == 1)
|
||||
const int nthreads = omp_get_num_threads();
|
||||
const int tid = omp_get_thread_num();
|
||||
const int idelta = _inum / nthreads + 1;
|
||||
const int ifrom = tid * idelta;
|
||||
const int ito = std::min(ifrom + idelta, _inum);
|
||||
int fl=ifrom*4;
|
||||
#else
|
||||
const int tid = 0;
|
||||
const int ifrom = 0;
|
||||
const int ito = _inum;
|
||||
int fl=0;
|
||||
#endif
|
||||
|
||||
for (int i=ifrom; i<ito; i++) {
|
||||
int ii=_ilist[i];
|
||||
tor[ii][0]+=force[fl];
|
||||
tor[ii][1]+=force[fl+1];
|
||||
tor[ii][2]+=force[fl+2];
|
||||
f[ii][0]+=force[fl];
|
||||
f[ii][1]+=force[fl+1];
|
||||
f[ii][2]+=force[fl+2];
|
||||
fl+=4;
|
||||
}
|
||||
if (_rot) {
|
||||
fl=_inum*4 + ifrom*4;
|
||||
for (int i=ifrom; i<ito; i++) {
|
||||
int ii=_ilist[i];
|
||||
tor[ii][0]+=force[fl];
|
||||
tor[ii][1]+=force[fl+1];
|
||||
tor[ii][2]+=force[fl+2];
|
||||
fl+=4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user