Feb2021 GPU Package Update - GPU Package Files

This commit is contained in:
Michael Brown
2021-02-15 08:20:50 -08:00
parent 16004e8f45
commit e7e2d2323b
345 changed files with 13424 additions and 7708 deletions

View File

@ -14,6 +14,9 @@
***************************************************************************/
#include "lal_answer.h"
#if (LAL_USE_OMP == 1)
#include <omp.h>
#endif
namespace LAMMPS_AL {
#define AnswerT Answer<numtyp,acctyp>
@ -56,7 +59,7 @@ bool AnswerT::alloc(const int inum) {
template <class numtyp, class acctyp>
bool AnswerT::init(const int inum, const bool charge, const bool rot,
UCL_Device &devi) {
UCL_Device &devi) {
clear();
bool success=true;
@ -81,6 +84,10 @@ bool AnswerT::init(const int inum, const bool charge, const bool rot,
_time_cast=0.0;
_time_cpu_idle=0.0;
success=success && (error_flag.alloc(1,*dev,UCL_READ_WRITE,
UCL_WRITE_ONLY)==UCL_SUCCESS);
if (success) error_flag.zero();
return success && alloc(ef_inum);
}
@ -111,6 +118,7 @@ bool AnswerT::add_fields(const bool charge, const bool rot) {
template <class numtyp, class acctyp>
void AnswerT::clear() {
_gpu_bytes=0;
error_flag.clear();
if (!_allocated)
return;
_allocated=false;
@ -138,12 +146,21 @@ double AnswerT::host_memory_usage() const {
template <class numtyp, class acctyp>
void AnswerT::copy_answers(const bool eflag, const bool vflag,
const bool ef_atom, const bool vf_atom) {
const bool ef_atom, const bool vf_atom,
const int red_blocks) {
time_answer.start();
_eflag=eflag;
_vflag=vflag;
_ef_atom=ef_atom;
_vf_atom=vf_atom;
#ifdef LAL_NO_BLOCK_REDUCE
_ev_stride=_inum;
#else
if (ef_atom || vf_atom)
_ev_stride=_inum;
else
_ev_stride=red_blocks;
#endif
int csize=_ev_fields;
if (!eflag)
@ -152,20 +169,24 @@ void AnswerT::copy_answers(const bool eflag, const bool vflag,
csize-=6;
if (csize>0)
engv.update_host(_inum*csize,true);
engv.update_host(_ev_stride*csize,true);
if (_rot)
force.update_host(_inum*4*2,true);
else
force.update_host(_inum*4,true);
time_answer.stop();
#ifndef GERYON_OCL_FLUSH
force.flush();
#endif
}
template <class numtyp, class acctyp>
void AnswerT::copy_answers(const bool eflag, const bool vflag,
const bool ef_atom, const bool vf_atom,
int *ilist) {
const bool ef_atom, const bool vf_atom,
int *ilist, const int red_blocks) {
_ilist=ilist;
copy_answers(eflag,vflag,ef_atom,vf_atom);
copy_answers(eflag,vflag,ef_atom,vf_atom,red_blocks);
}
template <class numtyp, class acctyp>
@ -177,21 +198,24 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
double evdwl=0.0;
int vstart=0;
if (_eflag) {
for (int i=0; i<_inum; i++)
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd reduction(+:evdwl)
#endif
for (int i=0; i<_ev_stride; i++)
evdwl+=engv[i];
if (_ef_atom) {
if (_ilist==nullptr) {
for (int i=0; i<_inum; i++)
for (int i=0; i<_ev_stride; i++)
eatom[i]+=engv[i];
} else {
for (int i=0; i<_inum; i++)
for (int i=0; i<_ev_stride; i++)
eatom[_ilist[i]]+=engv[i];
}
}
vstart=_inum;
vstart=_ev_stride;
}
if (_vflag) {
int iend=vstart+_inum;
int iend=vstart+_ev_stride;
for (int j=0; j<6; j++) {
for (int i=vstart; i<iend; i++)
virial[j]+=engv[i];
@ -206,8 +230,8 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
vatom[_ilist[ii++]][j]+=engv[i];
}
}
vstart+=_inum;
iend+=_inum;
vstart+=_ev_stride;
iend+=_ev_stride;
}
}
@ -224,28 +248,36 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
return energy_virial(eatom,vatom,virial);
double evdwl=0.0;
int ii, vstart=0, iend=_inum;
int ii, vstart=0, iend=_ev_stride;
if (_eflag) {
iend=_inum*2;
for (int i=0; i<_inum; i++)
iend=_ev_stride*2;
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd reduction(+:evdwl)
#endif
for (int i=0; i<_ev_stride; i++)
evdwl+=engv[i];
for (int i=_inum; i<iend; i++)
ecoul+=engv[i];
double ecv=0.0;
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd reduction(+:ecv)
#endif
for (int i=_ev_stride; i<iend; i++)
ecv+=engv[i];
ecoul+=ecv;
if (_ef_atom) {
if (_ilist==nullptr) {
for (int i=0; i<_inum; i++)
for (int i=0; i<_ev_stride; i++)
eatom[i]+=engv[i];
for (int i=_inum; i<iend; i++)
for (int i=_ev_stride; i<iend; i++)
eatom[i]+=engv[i];
} else {
for (int i=0, ii=0; i<_inum; i++)
for (int i=0, ii=0; i<_ev_stride; i++)
eatom[_ilist[ii++]]+=engv[i];
for (int i=_inum, ii=0; i<iend; i++)
for (int i=_ev_stride, ii=0; i<iend; i++)
eatom[_ilist[ii++]]+=engv[i];
}
}
vstart=iend;
iend+=_inum;
iend+=_ev_stride;
}
if (_vflag) {
for (int j=0; j<6; j++) {
@ -260,8 +292,8 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
vatom[_ilist[ii++]][j]+=engv[i];
}
}
vstart+=_inum;
iend+=_inum;
vstart+=_ev_stride;
iend+=_ev_stride;
}
}
@ -270,38 +302,79 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
template <class numtyp, class acctyp>
void AnswerT::get_answers(double **f, double **tor) {
int fl=0;
if (_ilist==nullptr) {
for (int i=0; i<_inum; i++) {
f[i][0]+=force[fl];
f[i][1]+=force[fl+1];
f[i][2]+=force[fl+2];
fl+=4;
}
if (_rot) {
for (int i=0; i<_inum; i++) {
tor[i][0]+=force[fl];
tor[i][1]+=force[fl+1];
tor[i][2]+=force[fl+2];
fl+=4;
typedef struct { double x,y,z; } vec3d;
typedef struct { acctyp x,y,z,w; } vec4d_t;
vec3d *fp=reinterpret_cast<vec3d*>(&(f[0][0]));
vec4d_t *forcep=reinterpret_cast<vec4d_t*>(&(force[0]));
#if (LAL_USE_OMP == 1)
#pragma omp parallel
#endif
{
#if (LAL_USE_OMP == 1)
const int nthreads = omp_get_num_threads();
const int tid = omp_get_thread_num();
const int idelta = _inum / nthreads + 1;
const int ifrom = tid * idelta;
const int ito = std::min(ifrom + idelta, _inum);
#else
const int tid = 0;
const int ifrom = 0;
const int ito = _inum;
#endif
for (int i=ifrom; i<ito; i++) {
fp[i].x+=forcep[i].x;
fp[i].y+=forcep[i].y;
fp[i].z+=forcep[i].z;
}
if (_rot) {
vec3d *torp=reinterpret_cast<vec3d*>(&(tor[0][0]));
forcep=reinterpret_cast<vec4d_t*>(&(force[_inum*4]));
for (int i=ifrom; i<ito; i++) {
torp[i].x+=forcep[i].x;
torp[i].y+=forcep[i].y;
torp[i].z+=forcep[i].z;
}
}
}
} else {
for (int i=0; i<_inum; i++) {
int ii=_ilist[i];
f[ii][0]+=force[fl];
f[ii][1]+=force[fl+1];
f[ii][2]+=force[fl+2];
fl+=4;
}
if (_rot) {
for (int i=0; i<_inum; i++) {
#if (LAL_USE_OMP == 1)
#pragma omp parallel
#endif
{
#if (LAL_USE_OMP == 1)
const int nthreads = omp_get_num_threads();
const int tid = omp_get_thread_num();
const int idelta = _inum / nthreads + 1;
const int ifrom = tid * idelta;
const int ito = std::min(ifrom + idelta, _inum);
int fl=ifrom*4;
#else
const int tid = 0;
const int ifrom = 0;
const int ito = _inum;
int fl=0;
#endif
for (int i=ifrom; i<ito; i++) {
int ii=_ilist[i];
tor[ii][0]+=force[fl];
tor[ii][1]+=force[fl+1];
tor[ii][2]+=force[fl+2];
f[ii][0]+=force[fl];
f[ii][1]+=force[fl+1];
f[ii][2]+=force[fl+2];
fl+=4;
}
if (_rot) {
fl=_inum*4 + ifrom*4;
for (int i=ifrom; i<ito; i++) {
int ii=_ilist[i];
tor[ii][0]+=force[fl];
tor[ii][1]+=force[fl+1];
tor[ii][2]+=force[fl+2];
fl+=4;
}
}
}
}
}