git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12632 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2014-10-10 15:36:26 +00:00
parent a9906d1023
commit 04a786241d
4 changed files with 54 additions and 157 deletions

View File

@ -226,6 +226,7 @@ void IntelBuffers<flt_t, acc_t>::free_local()
{
if (_off_map_maxlocal > 0) {
int * cnumneigh = _cnumneigh;
int * atombin = _atombin;
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_ilist != NULL) {
const int * ilist = _off_map_ilist;
@ -233,11 +234,12 @@ void IntelBuffers<flt_t, acc_t>::free_local()
_off_map_ilist = NULL;
if (numneigh != 0 && ilist != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
nocopy(ilist,numneigh,cnumneigh,atombin:alloc_if(0) free_if(1))
}
}
#endif
lmp->memory->destroy(cnumneigh);
lmp->memory->destroy(atombin);
_off_map_maxlocal = 0;
}
}
@ -251,6 +253,7 @@ void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
free_local();
int size = list->get_maxlocal();
lmp->memory->create(_cnumneigh, size, "_cnumneigh");
lmp->memory->create(_atombin, size, "_atombin");
_off_map_maxlocal = size;
#ifdef _LMP_INTEL_OFFLOAD
@ -258,11 +261,13 @@ void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
int * numneigh = list->numneigh;
int * ilist = list->ilist;
int * cnumneigh = _cnumneigh;
if (cnumneigh != 0) {
int * atombin = _atombin;
if (cnumneigh != 0 && atombin != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0)) \
nocopy(atombin:length(size) alloc_if(1) free_if(0))
}
_off_map_ilist = ilist;
_off_map_numneigh = numneigh;
@ -420,7 +425,7 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
if (_off_f) tmem += fstride*_off_threads * sizeof(vec3_acc_t);
#endif
tmem += _off_map_maxlocal * sizeof(int);
tmem += _off_map_maxlocal * sizeof(int) * 2;
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
tmem += _ntypes * _ntypes * sizeof(int);

View File

@ -126,6 +126,7 @@ class IntelBuffers {
inline int * firstneigh(const NeighList *list) { return _list_alloc; }
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
inline int * get_atombin() { return _atombin; }
inline atom_t * get_x(const int offload = 1) {
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers && offload == 0) return _host_x;
@ -249,6 +250,7 @@ class IntelBuffers {
int _list_alloc_atoms;
int * _list_alloc;
int * _cnumneigh;
int * _atombin;
flt_t **_cutneighsq;
int _ntypes;

View File

@ -32,47 +32,6 @@ using namespace LAMMPS_NS;
#pragma offload_attribute(push,target(mic))
#endif
template <class flt_t>
inline int mcoord2bin(const flt_t x0, const flt_t x1, const flt_t x2,
const flt_t bboxlo0, const flt_t bboxlo1,
const flt_t bboxlo2, const flt_t bboxhi0,
const flt_t bboxhi1, const flt_t bboxhi2,
const flt_t bininvx, const flt_t bininvy,
const flt_t bininvz, const int nbinx, const int nbiny,
const int nbinz, const int mbinx, const int mbiny,
const int mbinz, const int mbinxlo, const int mbinylo,
const int mbinzlo)
{
int ix, iy, iz;
if (x0 >= bboxhi0)
ix = static_cast<int> ((x0 - bboxhi0) * bininvx) + nbinx;
else if (x0 >= bboxlo0) {
ix = static_cast<int> ((x0 - bboxlo0) * bininvx);
ix = MIN(ix, nbinx-1);
} else
ix = static_cast<int> ((x0 - bboxlo0) * bininvx) - 1;
if (x1 >= bboxhi1)
iy = static_cast<int> ((x1 - bboxhi1) * bininvy) + nbiny;
else if (x1 >= bboxlo1) {
iy = static_cast<int> ((x1 - bboxlo1) * bininvy);
iy = MIN(iy, nbiny-1);
} else
iy = static_cast<int> ((x1 - bboxlo1) * bininvy) - 1;
if (x2 >= bboxhi2)
iz = static_cast<int> ((x2 - bboxhi2) * bininvz) + nbinz;
else if (x2 >= bboxlo2) {
iz = static_cast<int> ((x2 - bboxlo2) * bininvz);
iz = MIN(iz, nbinz - 1);
} else
iz = static_cast<int> ((x2 - bboxlo2) * bininvz) - 1;
return (iz - mbinzlo) * mbiny * mbinx + (iy - mbinylo) * mbinx +
(ix - mbinxlo);
}
#define ofind_special(which, special, nspecial, i, tag, special_flag) \
{ \
which = 0; \
@ -104,20 +63,14 @@ inline int mcoord2bin(const flt_t x0, const flt_t x1, const flt_t x2,
#endif
template <class flt_t, class acc_t>
void Neighbor::bin_atoms(void * xin) {
void Neighbor::bin_atoms(void * xin, int * _noalias const atombin) {
const ATOM_T * _noalias const x = (const ATOM_T * _noalias const)xin;
int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
const flt_t bininvx = this->bininvx;
const flt_t bininvy = this->bininvy;
const flt_t bininvz = this->bininvz;
const flt_t bboxlo0 = this->bboxlo[0];
const flt_t bboxlo1 = this->bboxlo[1];
const flt_t bboxlo2 = this->bboxlo[2];
const flt_t bboxhi0 = this->bboxhi[0];
const flt_t bboxhi1 = this->bboxhi[1];
const flt_t bboxhi2 = this->bboxhi[2];
const double sboxlo0 = bboxlo[0] + mbinxlo/bininvx;
const double sboxlo1 = bboxlo[1] + mbinylo/bininvy;
const double sboxlo2 = bboxlo[2] + mbinzlo/bininvz;
int i, ibin;
@ -129,25 +82,26 @@ void Neighbor::bin_atoms(void * xin) {
int bitmask = group->bitmask[includegroup];
for (i = nall-1; i >= nlocal; i--) {
if (mask[i] & bitmask) {
ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2,
bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, nbinx, nbiny,
nbinz, mbinx, mbiny, mbinz, mbinxlo, mbinylo, mbinzlo);
ibin = coord2bin(atom->x[i]);
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
}
for (i = atom->nfirst-1; i >= 0; i--) {
ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2,
bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, nbinx, nbiny,
nbinz, mbinx, mbiny, mbinz, mbinxlo, mbinylo, mbinzlo);
ibin = coord2bin(atom->x[i]);
atombin[i] = ibin;
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
} else {
for (i = nall-1; i >= 0; i--) {
ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2,
bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, nbinx, nbiny,
nbinz, mbinx, mbiny, mbinz, mbinxlo, mbinylo, mbinzlo);
for (i = nall-1; i >= nlocal; i--) {
ibin = coord2bin(atom->x[i]);
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
for (i = nlocal-1; i >= 0; i--) {
ibin = coord2bin(atom->x[i]);
atombin[i]=ibin;
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
@ -228,7 +182,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
fix->stop_watch(TIME_PACK);
fix->start_watch(TIME_HOST_NEIGHBOR);
bin_atoms<flt_t,acc_t>(buffers->get_x());
bin_atoms<flt_t,acc_t>(buffers->get_x(), buffers->get_atombin());
if (INTEL_MIC_NBOR_PAD > 1)
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
} else {
@ -295,22 +249,13 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
const flt_t bboxlo0 = this->bboxlo[0];
const flt_t bboxlo1 = this->bboxlo[1];
const flt_t bboxlo2 = this->bboxlo[2];
const flt_t bboxhi0 = this->bboxhi[0];
const flt_t bboxhi1 = this->bboxhi[1];
const flt_t bboxhi2 = this->bboxhi[2];
const flt_t bininvx = this->bininvx;
const flt_t bininvy = this->bininvy;
const flt_t bininvz = this->bininvz;
int * _noalias const atombin = buffers->get_atombin();
// Make sure dummy coordinates to eliminate loop remainder not within cutoff
{
const flt_t dx = (INTEL_BIGP - bboxhi0);
const flt_t dy = (INTEL_BIGP - bboxhi1);
const flt_t dz = (INTEL_BIGP - bboxhi2);
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
const flt_t dy = (INTEL_BIGP - bboxhi[1]);
const flt_t dz = (INTEL_BIGP - bboxhi[2]);
if (dx * dx + dy * dy + dz * dz < static_cast<flt_t>(cutneighmaxsq))
error->one(FLERR,
"Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.");
@ -319,15 +264,6 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const special_flag = this->special_flag;
const int nbinx = this->nbinx;
const int nbiny = this->nbiny;
const int nbinz = this->nbinz;
const int mbinxlo = this->mbinxlo;
const int mbinylo = this->mbinylo;
const int mbinzlo = this->mbinzlo;
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * _noalias const bins = this->bins;
const int cop = fix->coprocessor_number();
const int separate_buffers = fix->separate_buffers();
@ -343,12 +279,11 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(special_flag:length(0) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,nbinx,nbiny,nbinz) \
in(mbinxlo,mbinylo,mbinzlo,mbinx,mbiny,mbinz,pad_width,offload) \
in(bininvx,bininvy,bininvz,bboxlo0,bboxlo1,bboxlo2,separate_buffers) \
in(bboxhi0, bboxhi1, bboxhi2, astart, aend, nlocal, molecular, ntypes) \
in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload) \
in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(numneigh)
@ -402,10 +337,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
// stores own/own pairs only once
// stores own/ghost pairs on both procs
ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2,
bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz,
nbinx, nbiny, nbinz, mbinx, mbiny, mbinz,
mbinxlo, mbinylo, mbinzlo);
ibin = atombin[i];
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
@ -648,7 +580,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
fix->stop_watch(TIME_PACK);
fix->start_watch(TIME_HOST_NEIGHBOR);
bin_atoms<flt_t,acc_t>(buffers->get_x());
bin_atoms<flt_t,acc_t>(buffers->get_x(), buffers->get_atombin());
if (INTEL_MIC_NBOR_PAD > 1)
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
} else {
@ -718,21 +650,13 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const flt_t bboxlo0 = this->bboxlo[0];
const flt_t bboxlo1 = this->bboxlo[1];
const flt_t bboxlo2 = this->bboxlo[2];
const flt_t bboxhi0 = this->bboxhi[0];
const flt_t bboxhi1 = this->bboxhi[1];
const flt_t bboxhi2 = this->bboxhi[2];
const flt_t bininvx = this->bininvx;
const flt_t bininvy = this->bininvy;
const flt_t bininvz = this->bininvz;
// Make sure dummy coordinates to eliminate loop remainder not within cutoff
{
const flt_t dx = (INTEL_BIGP - bboxhi0);
const flt_t dy = (INTEL_BIGP - bboxhi1);
const flt_t dz = (INTEL_BIGP - bboxhi2);
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
const flt_t dy = (INTEL_BIGP - bboxhi[1]);
const flt_t dz = (INTEL_BIGP - bboxhi[2]);
if (dx * dx + dy * dy + dz * dz < static_cast<flt_t>(cutneighmaxsq))
error->one(FLERR,
"Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.");
@ -741,15 +665,6 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const special_flag = this->special_flag;
const int nbinx = this->nbinx;
const int nbiny = this->nbiny;
const int nbinz = this->nbinz;
const int mbinxlo = this->mbinxlo;
const int mbinylo = this->mbinylo;
const int mbinzlo = this->mbinzlo;
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * _noalias const bins = this->bins;
const int cop = fix->coprocessor_number();
const int separate_buffers = fix->separate_buffers();
@ -765,12 +680,11 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(special_flag:length(0) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,nbinx,nbiny,nbinz,e_nall,offload)\
in(mbinxlo,mbinylo,mbinzlo,mbinx,mbiny,mbinz,pad_width,offload_end) \
in(bininvx,bininvy,bininvz,bboxlo0,bboxlo1,bboxlo2,separate_buffers) \
in(bboxhi0, bboxhi1, bboxhi2, astart, aend, nlocal, molecular, ntypes) \
in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pad_width) \
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(numneigh)
@ -861,10 +775,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
}
// loop over all atoms in other bins in stencil, store every pair
ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2,
bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz,
nbinx, nbiny, nbinz, mbinx, mbiny, mbinz,
mbinxlo, mbinylo, mbinzlo);
ibin = atombin[i];
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
@ -1114,7 +1025,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
fix->stop_watch(TIME_PACK);
fix->start_watch(TIME_HOST_NEIGHBOR);
bin_atoms<flt_t,acc_t>(buffers->get_x());
bin_atoms<flt_t,acc_t>(buffers->get_x(), buffers->get_atombin());
if (INTEL_MIC_NBOR_PAD > 1)
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
} else {
@ -1184,21 +1095,13 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const flt_t bboxlo0 = this->bboxlo[0];
const flt_t bboxlo1 = this->bboxlo[1];
const flt_t bboxlo2 = this->bboxlo[2];
const flt_t bboxhi0 = this->bboxhi[0];
const flt_t bboxhi1 = this->bboxhi[1];
const flt_t bboxhi2 = this->bboxhi[2];
const flt_t bininvx = this->bininvx;
const flt_t bininvy = this->bininvy;
const flt_t bininvz = this->bininvz;
// Make sure dummy coordinates to eliminate loop remainder not within cutoff
{
const flt_t dx = (INTEL_BIGP - bboxhi0);
const flt_t dy = (INTEL_BIGP - bboxhi1);
const flt_t dz = (INTEL_BIGP - bboxhi2);
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
const flt_t dy = (INTEL_BIGP - bboxhi[1]);
const flt_t dz = (INTEL_BIGP - bboxhi[2]);
if (dx * dx + dy * dy + dz * dz < static_cast<flt_t>(cutneighmaxsq))
error->one(FLERR,
"Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.");
@ -1207,15 +1110,6 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const special_flag = this->special_flag;
const int nbinx = this->nbinx;
const int nbiny = this->nbiny;
const int nbinz = this->nbinz;
const int mbinxlo = this->mbinxlo;
const int mbinylo = this->mbinylo;
const int mbinzlo = this->mbinzlo;
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * _noalias const bins = this->bins;
const int cop = fix->coprocessor_number();
const int separate_buffers = fix->separate_buffers();
@ -1231,12 +1125,11 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(special_flag:length(0) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,nbinx,nbiny,nbinz,offload_end) \
in(mbinxlo,mbinylo,mbinzlo,mbinx,mbiny,mbinz,pad_width,e_nall,offload) \
in(bininvx,bininvy,bininvz,bboxlo0,bboxlo1,bboxlo2,separate_buffers) \
in(bboxhi0, bboxhi1, bboxhi2, astart, aend, nlocal, molecular, ntypes) \
in(maxnbors,nthreads,maxspecial,nstencil,offload_end,pad_width,e_nall) \
in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(numneigh)
@ -1291,10 +1184,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
// (equal zyx and j <= i)
// latter excludes self-self interaction but allows superposed atoms
ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2,
bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz,
nbinx, nbiny, nbinz, mbinx, mbiny, mbinz,
mbinxlo, mbinylo, mbinzlo);
ibin = atombin[i];
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {

View File

@ -32,7 +32,7 @@
void *fix_intel;
template <class flt_t, class acc_t>
void bin_atoms(void *);
void bin_atoms(void *, int *);
template <class flt_t, class acc_t, int>
void hbni(const int, NeighList *, void *, const int, const int, void *,