Merge branch 'lammps-icms' of bitbucket.org:akohlmey/lammps-icms into lammps-icms

Resolved Conflicts:
	doc/Manual.html
	doc/Manual.txt
	doc/package.txt
This commit is contained in:
Axel Kohlmeyer
2014-08-19 09:49:46 -04:00
31 changed files with 290 additions and 342 deletions

View File

@ -1,7 +1,7 @@
<HTML>
<HEAD>
<TITLE>LAMMPS-ICMS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="14 Aug 2014 version">
<META NAME="docnumber" CONTENT="15 Aug 2014 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -22,7 +22,7 @@
<CENTER><H3>LAMMPS-ICMS Documentation
</H3></CENTER>
<CENTER><H4>14 Aug 2014 version
<CENTER><H4>15 Aug 2014 version
</H4></CENTER>
<H4>Version info:
</H4>

View File

@ -1,6 +1,6 @@
<HEAD>
<TITLE>LAMMPS-ICMS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="14 Aug 2014 version">
<META NAME="docnumber" CONTENT="15 Aug 2014 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -18,7 +18,7 @@
<H1></H1>
LAMMPS-ICMS Documentation :c,h3
14 Aug 2014 version :c,h4
15 Aug 2014 version :c,h4
Version info: :h4

View File

@ -109,7 +109,7 @@ it to LAMMPS.
<LI> open-source distribution
<LI> highly portable C++
<LI> optional libraries used: MPI and single-processor FFT
<LI> Intel Xeon Phi, GPU (CUDA and OpenCL), and OpenMP support for many code features
<LI> GPU (CUDA and OpenCL), Intel Xeon Phi, and OpenMP support for many code features
<LI> easy to extend with new features and functionality
<LI> runs from an input script
<LI> syntax for defining and using variables and formulas

View File

@ -105,7 +105,7 @@ General features :h4
open-source distribution
highly portable C++
optional libraries used: MPI and single-processor FFT
Intel Xeon Phi, GPU (CUDA and OpenCL), and OpenMP support for many code features
GPU (CUDA and OpenCL), Intel Xeon Phi, and OpenMP support for many code features
easy to extend with new features and functionality
runs from an input script
syntax for defining and using variables and formulas

View File

@ -39,7 +39,7 @@ args = arguments specific to the style :l
{cellsize} value = dist
dist = length (distance units) in each dimension for neighbor bins
{device} value = device_type
device_type = {kepler} or {fermi} or {cypress} or {phi} or {generic}
device_type = {kepler} or {fermi} or {cypress} or {phi} or {intel} or {generic}
{intel} args = Nthreads precision keyword value ...
Nthreads = # of OpenMP threads to associate with each MPI process on host
precision = {single} or {mixed} or {double}

View File

@ -3,7 +3,9 @@
# shape: 2 1.5 1
# cutoff 4.0 with skin 0.8
# NPT, T=2.4, P=8.0
package intel * mixed balance $b
package omp *
suffix $s
processors * * * grid numa

View File

@ -1,5 +1,7 @@
# Rhodopsin model
package intel * mixed balance $b
package omp *
suffix $s
variable x index 4

View File

@ -407,9 +407,11 @@ void VerletSplit::run(int n)
// all output
if (master) {
timer->stamp();
if (n_post_force) modify->post_force(vflag);
modify->final_integrate();
if (n_end_of_step) modify->end_of_step();
timer->stamp(Timer::MODIFY);
if (ntimestep == output->next) {
timer->stamp();

View File

@ -67,41 +67,3 @@ elif (test $mode = 0) then
touch ../accelerator_intel.h
fi
# step 3: map omp styles that are not in the intel package to intel suffix
#if (test $mode = 0) then
#
# rm -f ../*ompinto_intel*
#
#else
#
# echo " The 'intel' suffix will use the USER-OMP package for all"
# echo " angle, bond, dihedral, kspace, and improper styles:"
# stylelist="pair fix angle bond dihedral improper"
# for header in $stylelist; do
# HEADER=`echo $header | sed 's/\(.*\)/\U\1/'`
# outfile=../$header"_ompinto_intel.h"
# echo " Creating $header style map: $outfile"
# echo -n "// -- Header to map USER-OMP " > $outfile
# echo "styles to the intel suffix" >> $outfile
# echo >> $outfile
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
# grep -h 'Style(' ../$header*_omp.h | grep -v 'charmm/coul/long' | \
# grep -v 'lj/cut' | grep -v 'gayberne' | \
# sed 's/\/omp/\/intel/g' >> $outfile
# echo "#endif" >> $outfile
# done
#
# header="kspace"
# HEADER="KSPACE"
# outfile=../$header"_ompinto_intel.h"
# echo " Creating $header style map: $outfile"
# echo -n "// -- Header to map USER-OMP " > $outfile
# echo "styles to the intel suffix" >> $outfile
# echo >> $outfile
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
# grep -h 'KSpaceStyle(' ../*_omp.h | sed 's/\/omp/\/intel/g' >> $outfile
# echo "#endif" >> $outfile
#
#fi

View File

@ -128,7 +128,7 @@ class FixIntel : public Fix {
protected:
int _overflow_flag[5];
__declspec(align(64)) int _off_overflow_flag[5];
_alignvar(int _off_overflow_flag[5],64);
int _allow_separate_buffers, _offload_ghost;
#ifdef _LMP_INTEL_OFFLOAD
double _balance_pair_time, _balance_other_time;
@ -155,18 +155,18 @@ class FixIntel : public Fix {
double _offload_balance, _balance_neighbor, _balance_pair, _balance_fixed;
double _timers[NUM_ITIMERS];
double _stopwatch[NUM_ITIMERS];
__declspec(align(64)) double _stopwatch_offload_neighbor[1];
__declspec(align(64)) double _stopwatch_offload_pair[1];
_alignvar(double _stopwatch_offload_neighbor[1],64);
_alignvar(double _stopwatch_offload_pair[1],64);
template <class ft, class acc_t>
inline void add_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
inline void add_results(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global,
const int eatom, const int vatom,
const int offload);
template <class ft, class acc_t>
inline void add_oresults(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
inline void add_oresults(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global,
const int eatom, const int vatom,
const int out_offset, const int nall);
@ -176,8 +176,8 @@ class FixIntel : public Fix {
int _im_real_space_task;
MPI_Comm _real_space_comm;
template <class ft, class acc_t>
inline void add_off_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global);
inline void add_off_results(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global);
#endif
};
@ -284,8 +284,8 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
/* ---------------------------------------------------------------------- */
template <class ft, class acc_t>
void FixIntel::add_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
void FixIntel::add_results(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global,
const int eatom, const int vatom,
const int offload) {
start_watch(TIME_PACK);
@ -295,7 +295,7 @@ void FixIntel::add_results(const ft * restrict const f_in,
if (offload) {
add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal);
if (force->newton_pair) {
const acc_t * restrict const enull = 0;
const acc_t * _noalias const enull = 0;
int offset = _offload_nlocal;
if (atom->torque) offset *= 2;
add_oresults(f_in + offset, enull, eatom, vatom,
@ -305,7 +305,7 @@ void FixIntel::add_results(const ft * restrict const f_in,
add_oresults(f_in, ev_global, eatom, vatom,
_host_min_local, _host_used_local);
if (force->newton_pair) {
const acc_t * restrict const enull = 0;
const acc_t * _noalias const enull = 0;
int offset = _host_used_local;
if (atom->torque) offset *= 2;
add_oresults(f_in + offset, enull, eatom,
@ -333,11 +333,11 @@ void FixIntel::add_results(const ft * restrict const f_in,
/* ---------------------------------------------------------------------- */
template <class ft, class acc_t>
void FixIntel::add_oresults(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
void FixIntel::add_oresults(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global,
const int eatom, const int vatom,
const int out_offset, const int nall) {
lmp_ft * restrict const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
lmp_ft * _noalias const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
if (atom->torque) {
if (f_in[1].w)
if (f_in[1].w == 1)
@ -356,7 +356,7 @@ void FixIntel::add_oresults(const ft * restrict const f_in,
IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t));
if (atom->torque) {
int ii = ifrom * 2;
lmp_ft * restrict const tor = (lmp_ft *) lmp->atom->torque[0] +
lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] +
out_offset;
if (eatom) {
for (int i = ifrom; i < ito; i++) {
@ -464,8 +464,8 @@ void FixIntel::set_neighbor_host_sizes() {
/* ---------------------------------------------------------------------- */
template <class ft, class acc_t>
void FixIntel::add_off_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global) {
void FixIntel::add_off_results(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global) {
if (_offload_balance < 0.0)
_balance_other_time = MPI_Wtime() - _balance_other_time;

View File

@ -22,8 +22,8 @@ using namespace LAMMPS_NS;
template <class flt_t, class acc_t>
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _buf_size(0),
_buf_local_size(0), _off_threads(0) {
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _off_threads(0),
_buf_size(0), _buf_local_size(0) {
_list_alloc_atoms = 0;
_ntypes = 0;
_off_map_maxlocal = 0;
@ -423,6 +423,8 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
tmem += _off_map_maxlocal * sizeof(int);
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
tmem += _ntypes * _ntypes * sizeof(int);
return tmem;
}
/* ---------------------------------------------------------------------- */

View File

@ -266,8 +266,8 @@ class IntelBuffers {
#endif
int _buf_size, _buf_local_size;
__declspec(align(64)) acc_t _ev_global[8];
__declspec(align(64)) acc_t _ev_global_host[8];
_alignvar(acc_t _ev_global[8],64);
_alignvar(acc_t _ev_global_host[8],64);
void _grow(const int nall, const int nlocal, const int nthreads,
const int offload_end);

View File

@ -351,7 +351,7 @@ inline double MIC_Wtime() {
for (int t = 1; t < nthreads; t++) { \
_Pragma("vector nontemporal") \
for (int n = iifrom; n < iito; n++) { \
f_start[n].x += f_start[n + t_off].x; \
f_start[n].x += f_start[n + t_off].x; \
f_start[n].y += f_start[n + t_off].y; \
f_start[n].z += f_start[n + t_off].z; \
f_start[n].w += f_start[n + t_off].w; \
@ -361,7 +361,7 @@ inline double MIC_Wtime() {
} else { \
for (int t = 1; t < nthreads; t++) { \
_Pragma("vector nontemporal") \
for (int n = iifrom; n < iito; n++) { \
for (int n = iifrom; n < iito; n++) { \
f_start[n].x += f_start[n + t_off].x; \
f_start[n].y += f_start[n + t_off].y; \
f_start[n].z += f_start[n + t_off].z; \
@ -372,7 +372,7 @@ inline double MIC_Wtime() {
\
if (evflag) { \
if (vflag == 2) { \
const ATOM_T * restrict const xo = x + minlocal; \
const ATOM_T * _noalias const xo = x + minlocal; \
_Pragma("vector nontemporal") \
for (int n = iifrom; n < iito; n++) { \
ov0 += f_start[n].x * xo[n].x; \

View File

@ -105,7 +105,7 @@ inline int mcoord2bin(const flt_t x0, const flt_t x1, const flt_t x2,
template <class flt_t, class acc_t>
void Neighbor::bin_atoms(void * xin) {
const ATOM_T * restrict const x = (const ATOM_T * restrict const)xin;
const ATOM_T * _noalias const x = (const ATOM_T * _noalias const)xin;
int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
@ -243,8 +243,8 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
return;
}
const ATOM_T * restrict const x = buffers->get_x();
int * restrict const firstneigh = buffers->firstneigh(list);
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
const int molecular = atom->molecular;
int *ns = NULL, *s = NULL;
@ -260,17 +260,17 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
tag_size = 0;
special_size = 0;
}
const int * restrict const special = s;
const int * restrict const nspecial = ns;
const int * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const int * restrict const tag = atom->tag;
const int * _noalias const tag = atom->tag;
int * restrict const ilist = list->ilist;
int * restrict numneigh = list->numneigh;
int * restrict const cnumneigh = buffers->cnumneigh(list);
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = list->nstencil;
const int * restrict const stencil = list->stencil;
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
const int * _noalias const stencil = list->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
@ -316,8 +316,8 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
}
#ifdef _LMP_INTEL_OFFLOAD
const int * restrict const binhead = this->binhead;
const int * restrict const special_flag = this->special_flag;
const int * _noalias const binhead = this->binhead;
const int * _noalias const special_flag = this->special_flag;
const int nbinx = this->nbinx;
const int nbiny = this->nbiny;
const int nbinz = this->nbinz;
@ -327,7 +327,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * restrict const bins = this->bins;
const int * _noalias const bins = this->bins;
const int cop = fix->coprocessor_number();
const int separate_buffers = fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
@ -486,7 +486,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
if (molecular) {
for (int i = ifrom; i < ito; ++i) {
int * restrict jlist = firstneigh + cnumneigh[i];
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
@ -507,7 +507,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * restrict jlist = firstneigh + cnumneigh[i];
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
for (jj = 0; jj < jnum; jj++)
@ -662,8 +662,8 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
return;
}
const ATOM_T * restrict const x = buffers->get_x();
int * restrict const firstneigh = buffers->firstneigh(list);
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
int nall_t = nall;
if (offload_noghost && offload) nall_t = atom->nlocal;
const int e_nall = nall_t;
@ -682,17 +682,17 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
tag_size = 0;
special_size = 0;
}
const int * restrict const special = s;
const int * restrict const nspecial = ns;
const int * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const int * restrict const tag = atom->tag;
const int * _noalias const tag = atom->tag;
int * restrict const ilist = list->ilist;
int * restrict numneigh = list->numneigh;
int * restrict const cnumneigh = buffers->cnumneigh(list);
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = list->nstencil;
const int * restrict const stencil = list->stencil;
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
const int * _noalias const stencil = list->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
@ -737,8 +737,8 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
}
#ifdef _LMP_INTEL_OFFLOAD
const int * restrict const binhead = this->binhead;
const int * restrict const special_flag = this->special_flag;
const int * _noalias const binhead = this->binhead;
const int * _noalias const special_flag = this->special_flag;
const int nbinx = this->nbinx;
const int nbiny = this->nbiny;
const int nbinz = this->nbinz;
@ -748,7 +748,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * restrict const bins = this->bins;
const int * _noalias const bins = this->bins;
const int cop = fix->coprocessor_number();
const int separate_buffers = fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
@ -948,7 +948,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
if (molecular) {
for (int i = ifrom; i < ito; ++i) {
int * restrict jlist = firstneigh + cnumneigh[i];
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
@ -970,7 +970,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * restrict jlist = firstneigh + cnumneigh[i];
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
for (jj = 0; jj < jnum; jj++)
@ -1127,8 +1127,8 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
return;
}
const ATOM_T * restrict const x = buffers->get_x();
int * restrict const firstneigh = buffers->firstneigh(list);
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
int nall_t = nall;
if (offload_noghost && offload) nall_t = atom->nlocal;
const int e_nall = nall_t;
@ -1147,17 +1147,17 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
tag_size = 0;
special_size = 0;
}
const int * restrict const special = s;
const int * restrict const nspecial = ns;
const int * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const int * restrict const tag = atom->tag;
const int * _noalias const tag = atom->tag;
int * restrict const ilist = list->ilist;
int * restrict numneigh = list->numneigh;
int * restrict const cnumneigh = buffers->cnumneigh(list);
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = list->nstencil;
const int * restrict const stencil = list->stencil;
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
const int * _noalias const stencil = list->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
@ -1202,8 +1202,8 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
}
#ifdef _LMP_INTEL_OFFLOAD
const int * restrict const binhead = this->binhead;
const int * restrict const special_flag = this->special_flag;
const int * _noalias const binhead = this->binhead;
const int * _noalias const special_flag = this->special_flag;
const int nbinx = this->nbinx;
const int nbiny = this->nbiny;
const int nbinz = this->nbinz;
@ -1213,7 +1213,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * restrict const bins = this->bins;
const int * _noalias const bins = this->bins;
const int cop = fix->coprocessor_number();
const int separate_buffers = fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
@ -1386,7 +1386,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
if (molecular) {
for (int i = ifrom; i < ito; ++i) {
int * restrict jlist = firstneigh + cnumneigh[i];
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
@ -1407,7 +1407,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * restrict jlist = firstneigh + cnumneigh[i];
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
for (jj = 0; jj < jnum; jj++)

View File

@ -79,7 +79,7 @@ void PairGayBerneIntel::compute(int eflag, int vflag,
fix->start_watch(TIME_PACK);
const AtomVecEllipsoid::Bonus * const bonus = avec->bonus;
const int * const ellipsoid = atom->ellipsoid;
QUAT_T * restrict const quat = buffers->get_quat();
QUAT_T * _noalias const quat = buffers->get_quat();
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
#endif
@ -150,8 +150,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
ATOM_T * restrict const x = buffers->get_x(offload);
QUAT_T * restrict const quat = buffers->get_quat(offload);
ATOM_T * _noalias const x = buffers->get_x(offload);
QUAT_T * _noalias const quat = buffers->get_quat(offload);
const AtomVecEllipsoid::Bonus *bonus = avec->bonus;
const int *ellipsoid = atom->ellipsoid;
@ -225,15 +225,15 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
}
#endif
// const int * restrict const ilist = list->ilist;
const int * restrict const numneigh = list->numneigh;
const int * restrict const cnumneigh = buffers->cnumneigh(list);
const int * restrict const firstneigh = buffers->firstneigh(list);
const flt_t * restrict const special_lj = fc.special_lj;
// const int * _noalias const ilist = list->ilist;
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const flt_t * _noalias const special_lj = fc.special_lj;
const FC_PACKED1_T * restrict const ijc = fc.ijc[0];
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
const FC_PACKED3_T * restrict const ic = fc.ic;
const FC_PACKED1_T * _noalias const ijc = fc.ijc[0];
const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0];
const FC_PACKED3_T * _noalias const ic = fc.ic;
const flt_t mu = fc.mu;
const flt_t gamma = fc.gamma;
const flt_t upsilon = fc.upsilon;
@ -255,8 +255,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * restrict f_start;
acc_t * restrict ev_global;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int max_nbors = _max_nbors;
const int nthreads = tc;
@ -351,25 +351,25 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
iifrom += astart;
iito += astart;
FORCE_T * restrict const f = f_start - minlocal * 2 + (tid * f_stride);
FORCE_T * _noalias const f = f_start - minlocal * 2 + (tid * f_stride);
memset(f + minlocal * 2, 0, f_stride * sizeof(FORCE_T));
flt_t * restrict const rsq_form = rsq_formi + tid * max_nbors;
flt_t * restrict const delx_form = delx_formi + tid * max_nbors;
flt_t * restrict const dely_form = dely_formi + tid * max_nbors;
flt_t * restrict const delz_form = delz_formi + tid * max_nbors;
int * restrict const jtype_form = jtype_formi + tid * max_nbors;
int * restrict const jlist_form = jlist_formi + tid * max_nbors;
flt_t * _noalias const rsq_form = rsq_formi + tid * max_nbors;
flt_t * _noalias const delx_form = delx_formi + tid * max_nbors;
flt_t * _noalias const dely_form = dely_formi + tid * max_nbors;
flt_t * _noalias const delz_form = delz_formi + tid * max_nbors;
int * _noalias const jtype_form = jtype_formi + tid * max_nbors;
int * _noalias const jlist_form = jlist_formi + tid * max_nbors;
int ierror = 0;
for (int i = iifrom; i < iito; ++i) {
// const int i = ilist[ii];
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const FC_PACKED1_T * restrict const ijci = ijc + ptr_off;
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
const FC_PACKED1_T * _noalias const ijci = ijc + ptr_off;
const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off;
const int * restrict const jlist = firstneigh + cnumneigh[i];
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
const flt_t xtmp = x[i].x;
@ -819,7 +819,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
if (EVFLAG) {
if (vflag==2) {
const ATOM_T * restrict const xo = x + minlocal;
const ATOM_T * _noalias const xo = x + minlocal;
#pragma vector nontemporal
for (int n = iifrom; n < iito; n++) {
const int nt2 = n * 2;

View File

@ -62,7 +62,10 @@ class PairGayBerneIntel : public PairGayBerne {
typedef struct { flt_t lj3, lj4; } fc_packed2;
typedef struct { flt_t shape2[4], well[4]; } fc_packed3;
__declspec(align(64)) flt_t special_lj[4], gamma, upsilon, mu;
_alignvar(flt_t special_lj[4],64);
_alignvar(flt_t gamma,64);
_alignvar(flt_t upsilon,64);
_alignvar(flt_t mu,64);
fc_packed1 **ijc;
fc_packed2 **lj34;
fc_packed3 *ic;

View File

@ -143,25 +143,25 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * restrict const x = buffers->get_x(offload);
flt_t * restrict const q = buffers->get_q(offload);
ATOM_T * _noalias const x = buffers->get_x(offload);
flt_t * _noalias const q = buffers->get_q(offload);
const int * restrict const numneigh = list->numneigh;
const int * restrict const cnumneigh = buffers->cnumneigh(list);
const int * restrict const firstneigh = buffers->firstneigh(list);
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const flt_t * restrict const special_coul = fc.special_coul;
const flt_t * restrict const special_lj = fc.special_lj;
const flt_t * _noalias const special_coul = fc.special_coul;
const flt_t * _noalias const special_lj = fc.special_lj;
const flt_t qqrd2e = force->qqrd2e;
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
const flt_t * restrict const cutsq = fc.cutsq[0];
const LJ_T * restrict const lj = fc.lj[0];
const TABLE_T * restrict const table = fc.table;
const flt_t * restrict const etable = fc.etable;
const flt_t * restrict const detable = fc.detable;
const flt_t * restrict const ctable = fc.ctable;
const flt_t * restrict const dctable = fc.dctable;
const flt_t * _noalias const cutsq = fc.cutsq[0];
const LJ_T * _noalias const lj = fc.lj[0];
const TABLE_T * _noalias const table = fc.table;
const flt_t * _noalias const etable = fc.etable;
const flt_t * _noalias const detable = fc.detable;
const flt_t * _noalias const ctable = fc.ctable;
const flt_t * _noalias const dctable = fc.dctable;
const flt_t cut_ljsq = fc.cut_ljsq;
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
const flt_t cut_coulsq = fc.cut_coulsq;
@ -178,8 +178,8 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * restrict f_start;
acc_t * restrict ev_global;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
@ -242,7 +242,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
iifrom += astart;
iito += astart;
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
flt_t cutboth = cut_coulsq;
@ -251,10 +251,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const flt_t * restrict const cutsqi = cutsq + ptr_off;
const LJ_T * restrict const lji = lj + ptr_off;
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
const LJ_T * _noalias const lji = lj + ptr_off;
const int * restrict const jlist = firstneigh + cnumneigh[i];
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp,fytmp,fztmp,fwtmp;

View File

@ -62,8 +62,8 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {
class ForceConst {
public:
typedef struct { flt_t r, dr, f, df; } table_t;
__declspec(align(64)) flt_t special_coul[4];
__declspec(align(64)) flt_t special_lj[4];
_alignvar(flt_t special_coul[4],64);
_alignvar(flt_t special_lj[4],64);
flt_t **cutsq, g_ewald, tabinnersq;
flt_t cut_coulsq, cut_ljsq;
flt_t cut_lj_innersq;

View File

@ -143,24 +143,24 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * restrict const x = buffers->get_x(offload);
flt_t * restrict const q = buffers->get_q(offload);
ATOM_T * _noalias const x = buffers->get_x(offload);
flt_t * _noalias const q = buffers->get_q(offload);
const int * restrict const numneigh = list->numneigh;
const int * restrict const cnumneigh = buffers->cnumneigh(list);
const int * restrict const firstneigh = buffers->firstneigh(list);
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const flt_t * restrict const special_coul = fc.special_coul;
const flt_t * restrict const special_lj = fc.special_lj;
const flt_t * _noalias const special_coul = fc.special_coul;
const flt_t * _noalias const special_lj = fc.special_lj;
const flt_t qqrd2e = force->qqrd2e;
const C_FORCE_T * restrict const c_force = fc.c_force[0];
const C_ENERGY_T * restrict const c_energy = fc.c_energy[0];
const TABLE_T * restrict const table = fc.table;
const flt_t * restrict const etable = fc.etable;
const flt_t * restrict const detable = fc.detable;
const flt_t * restrict const ctable = fc.ctable;
const flt_t * restrict const dctable = fc.dctable;
const C_FORCE_T * _noalias const c_force = fc.c_force[0];
const C_ENERGY_T * _noalias const c_energy = fc.c_energy[0];
const TABLE_T * _noalias const table = fc.table;
const flt_t * _noalias const etable = fc.etable;
const flt_t * _noalias const detable = fc.detable;
const flt_t * _noalias const ctable = fc.ctable;
const flt_t * _noalias const dctable = fc.dctable;
const flt_t g_ewald = fc.g_ewald;
const flt_t tabinnersq = fc.tabinnersq;
@ -174,8 +174,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * restrict f_start;
acc_t * restrict ev_global;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
@ -237,17 +237,17 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
iifrom += astart;
iito += astart;
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
for (int i = iifrom; i < iito; ++i) {
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const C_FORCE_T * restrict const c_forcei = c_force + ptr_off;
const C_ENERGY_T * restrict const c_energyi = c_energy + ptr_off;
const C_FORCE_T * _noalias const c_forcei = c_force + ptr_off;
const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off;
const int * restrict const jlist = firstneigh + cnumneigh[i];
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp,fytmp,fztmp,fwtmp;

View File

@ -64,8 +64,8 @@ class PairLJCutCoulLongIntel : public PairLJCutCoulLong {
typedef struct { flt_t cutsq, cut_ljsq, lj1, lj2; } c_force_t;
typedef struct { flt_t lj3, lj4, offset, pad; } c_energy_t;
typedef struct { flt_t r, dr, f, df; } table_t;
__declspec(align(64)) flt_t special_coul[4];
__declspec(align(64)) flt_t special_lj[4];
_alignvar(flt_t special_coul[4],64);
_alignvar(flt_t special_lj[4],64);
flt_t g_ewald, tabinnersq;
c_force_t **c_force;
c_energy_t **c_energy;

View File

@ -134,14 +134,14 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * restrict const x = buffers->get_x(offload);
ATOM_T * _noalias const x = buffers->get_x(offload);
const int * restrict const numneigh = list->numneigh;
const int * restrict const cnumneigh = buffers->cnumneigh(list);
const int * restrict const firstneigh = buffers->firstneigh(list);
const flt_t * restrict const special_lj = fc.special_lj;
const FC_PACKED1_T * restrict const ljc12o = fc.ljc12o[0];
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const flt_t * _noalias const special_lj = fc.special_lj;
const FC_PACKED1_T * _noalias const ljc12o = fc.ljc12o[0];
const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0];
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
@ -153,8 +153,8 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * restrict f_start;
acc_t * restrict ev_global;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
int *overflow = fix->get_off_overflow_flag();
@ -184,17 +184,17 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
iifrom += astart;
iito += astart;
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
for (int i = iifrom; i < iito; ++i) {
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const FC_PACKED1_T * restrict const ljc12oi = ljc12o + ptr_off;
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
const FC_PACKED1_T * _noalias const ljc12oi = ljc12o + ptr_off;
const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off;
const int * restrict const jlist = firstneigh + cnumneigh[i];
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp, fytmp, fztmp, fwtmp;

View File

@ -62,7 +62,7 @@ class PairLJCutIntel : public PairLJCut {
typedef struct { flt_t cutsq, lj1, lj2, offset; } fc_packed1;
typedef struct { flt_t lj3, lj4; } fc_packed2;
__declspec(align(64)) flt_t special_lj[4];
_alignvar(flt_t special_lj[4],64);
fc_packed1 **ljc12o;
fc_packed2 **lj34;

View File

@ -17,6 +17,7 @@
#include "domain.h"
#include "comm.h"
#include "atom.h"
#include "atom_vec.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
@ -81,14 +82,9 @@ void VerletIntel::init()
// set flags for what arrays to clear in force_clear()
// need to clear additionals arrays if they exist
torqueflag = 0;
torqueflag = extraflag = 0;
if (atom->torque_flag) torqueflag = 1;
erforceflag = 0;
if (atom->erforce_flag) erforceflag = 1;
e_flag = 0;
if (atom->e_flag) e_flag = 1;
rho_flag = 0;
if (atom->rho_flag) rho_flag = 1;
if (atom->avec->forceclearflag) extraflag = 1;
// orthogonal vs triclinic simulation box
@ -276,8 +272,10 @@ void VerletIntel::run(int n)
// initial time integration
timer->stamp();
modify->initial_integrate(vflag);
if (n_post_integrate) modify->post_integrate();
timer->stamp(Timer::MODIFY);
// regular communication vs neighbor list rebuild
@ -286,9 +284,13 @@ void VerletIntel::run(int n)
if (nflag == 0) {
timer->stamp();
comm->forward_comm();
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
} else {
if (n_pre_exchange) modify->pre_exchange();
if (n_pre_exchange) {
timer->stamp();
modify->pre_exchange();
timer->stamp(Timer::MODIFY);
}
if (triclinic) domain->x2lamda(atom->nlocal);
domain->pbc();
if (domain->box_change) {
@ -301,10 +303,13 @@ void VerletIntel::run(int n)
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
timer->stamp(TIME_COMM);
if (n_pre_neighbor) modify->pre_neighbor();
timer->stamp(Timer::COMM);
if (n_pre_neighbor) {
modify->pre_neighbor();
timer->stamp(Timer::MODIFY);
}
neighbor->build();
timer->stamp(TIME_NEIGHBOR);
timer->stamp(Timer::NEIGH);
}
// force computations
@ -313,13 +318,18 @@ void VerletIntel::run(int n)
// and Pair:ev_tally() needs to be called before any tallying
force_clear();
if (n_pre_force) modify->pre_force(vflag);
timer->stamp();
if (n_pre_force) {
modify->pre_force(vflag);
timer->stamp(Timer::MODIFY);
}
if (pair_compute_flag) {
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
timer->stamp(Timer::PAIR);
}
if (atom->molecular) {
@ -327,18 +337,18 @@ void VerletIntel::run(int n)
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
timer->stamp(TIME_BOND);
timer->stamp(Timer::BOND);
}
if (kspace_compute_flag) {
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
timer->stamp(Timer::KSPACE);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 1) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
timer->stamp(Timer::PAIR);
}
#endif
@ -346,13 +356,13 @@ void VerletIntel::run(int n)
if (force->newton) {
comm->reverse_comm();
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 2) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
timer->stamp(Timer::PAIR);
}
#endif
@ -361,13 +371,14 @@ void VerletIntel::run(int n)
if (n_post_force) modify->post_force(vflag);
modify->final_integrate();
if (n_end_of_step) modify->end_of_step();
timer->stamp(Timer::MODIFY);
// all output
if (ntimestep == output->next) {
timer->stamp();
output->write(ntimestep);
timer->stamp(TIME_OUTPUT);
timer->stamp(Timer::OUTPUT);
}
}
}
@ -388,7 +399,7 @@ void VerletIntel::cleanup()
void VerletIntel::force_clear()
{
int i;
size_t nbytes;
if (external_force_clear) return;
@ -396,19 +407,16 @@ void VerletIntel::force_clear()
// if either newton flag is set, also include ghosts
// when using threads always clear all forces.
if (neighbor->includegroup == 0) {
int nall;
if (force->newton) nall = atom->nlocal + atom->nghost;
else nall = atom->nlocal;
int nlocal = atom->nlocal;
size_t nbytes = sizeof(double) * nall;
if (neighbor->includegroup == 0) {
nbytes = sizeof(double) * nlocal;
if (force->newton) nbytes += sizeof(double) * atom->nghost;
if (nbytes) {
memset(&(atom->f[0][0]),0,3*nbytes);
if (torqueflag) memset(&(atom->torque[0][0]),0,3*nbytes);
if (erforceflag) memset(&(atom->erforce[0]), 0, nbytes);
if (e_flag) memset(&(atom->de[0]), 0, nbytes);
if (rho_flag) memset(&(atom->drho[0]), 0, nbytes);
memset(&atom->f[0][0],0,3*nbytes);
if (torqueflag) memset(&atom->torque[0][0],0,3*nbytes);
if (extraflag) atom->avec->force_clear(0,nbytes);
}
// neighbor includegroup flag is set
@ -416,70 +424,21 @@ void VerletIntel::force_clear()
// if either newton flag is set, also include ghosts
} else {
int nall = atom->nfirst;
nbytes = sizeof(double) * atom->nfirst;
double **f = atom->f;
for (i = 0; i < nall; i++) {
f[i][0] = 0.0;
f[i][1] = 0.0;
f[i][2] = 0.0;
}
if (torqueflag) {
double **torque = atom->torque;
for (i = 0; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
if (erforceflag) {
double *erforce = atom->erforce;
for (i = 0; i < nall; i++) erforce[i] = 0.0;
}
if (e_flag) {
double *de = atom->de;
for (i = 0; i < nall; i++) de[i] = 0.0;
}
if (rho_flag) {
double *drho = atom->drho;
for (i = 0; i < nall; i++) drho[i] = 0.0;
if (nbytes) {
memset(&atom->f[0][0],0,3*nbytes);
if (torqueflag) memset(&atom->torque[0][0],0,3*nbytes);
if (extraflag) atom->avec->force_clear(0,nbytes);
}
if (force->newton) {
nall = atom->nlocal + atom->nghost;
nbytes = sizeof(double) * atom->nghost;
for (i = atom->nlocal; i < nall; i++) {
f[i][0] = 0.0;
f[i][1] = 0.0;
f[i][2] = 0.0;
}
if (torqueflag) {
double **torque = atom->torque;
for (i = atom->nlocal; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
if (erforceflag) {
double *erforce = atom->erforce;
for (i = atom->nlocal; i < nall; i++) erforce[i] = 0.0;
}
if (e_flag) {
double *de = atom->de;
for (i = 0; i < nall; i++) de[i] = 0.0;
}
if (rho_flag) {
double *drho = atom->drho;
for (i = 0; i < nall; i++) drho[i] = 0.0;
if (nbytes) {
memset(&atom->f[nlocal][0],0,3*nbytes);
if (torqueflag) memset(&atom->torque[nlocal][0],0,3*nbytes);
if (extraflag) atom->avec->force_clear(nlocal,nbytes);
}
}
}

View File

@ -39,8 +39,7 @@ class VerletIntel : public Integrate {
protected:
int triclinic; // 0 if domain is orthog, 1 if triclinic
int torqueflag,erforceflag;
int e_flag,rho_flag;
int torqueflag,extraflag;
virtual void force_clear();
#ifdef _LMP_INTEL_OFFLOAD

View File

@ -52,6 +52,9 @@ VerletSplitIntel::VerletSplitIntel(LAMMPS *lmp, int narg, char **arg) :
if (universe->procs_per_world[0] % universe->procs_per_world[1])
error->universe_all(FLERR,"Verlet/split requires Rspace partition "
"size be multiple of Kspace partition size");
if (comm->style != 0)
error->universe_all(FLERR,"Verlet/split can only currently be used with "
"comm_style brick");
// master = 1 for Rspace procs, 0 for Kspace procs
@ -214,6 +217,9 @@ VerletSplitIntel::~VerletSplitIntel()
void VerletSplitIntel::init()
{
if (comm->style != 0)
error->universe_all(FLERR,"Verlet/split can only currently be used with "
"comm_style brick");
if (!force->kspace && comm->me == 0)
error->warning(FLERR,"No Kspace calculation with verlet/split");
@ -277,7 +283,7 @@ void VerletSplitIntel::run(int n)
MPI_Barrier(universe->uworld);
timer->init();
timer->barrier_start(TIME_LOOP);
timer->barrier_start();
// setup initial Rspace <-> Kspace comm params
@ -323,7 +329,7 @@ void VerletSplitIntel::run(int n)
if (nflag == 0) {
timer->stamp();
comm->forward_comm();
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
} else {
if (n_pre_exchange) modify->pre_exchange();
if (triclinic) domain->x2lamda(atom->nlocal);
@ -338,10 +344,10 @@ void VerletSplitIntel::run(int n)
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
if (n_pre_neighbor) modify->pre_neighbor();
neighbor->build();
timer->stamp(TIME_NEIGHBOR);
timer->stamp(Timer::NEIGH);
}
}
@ -361,7 +367,7 @@ void VerletSplitIntel::run(int n)
timer->stamp();
if (force->pair) {
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
timer->stamp(Timer::PAIR);
}
if (atom->molecular) {
@ -369,25 +375,25 @@ void VerletSplitIntel::run(int n)
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
timer->stamp(TIME_BOND);
timer->stamp(Timer::BOND);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 1) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
timer->stamp(Timer::PAIR);
}
#endif
if (force->newton) {
comm->reverse_comm();
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 2) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
timer->stamp(Timer::PAIR);
}
#endif
@ -400,14 +406,14 @@ void VerletSplitIntel::run(int n)
if (force->kspace) {
timer->stamp();
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
timer->stamp(Timer::KSPACE);
}
// TIP4P PPPM puts forces on ghost atoms, so must reverse_comm()
if (tip4p_flag && force->newton) {
comm->reverse_comm();
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
}
}
@ -419,14 +425,16 @@ void VerletSplitIntel::run(int n)
// all output
if (master) {
timer->stamp();
if (n_post_force) modify->post_force(vflag);
modify->final_integrate();
if (n_end_of_step) modify->end_of_step();
timer->stamp(Timer::MODIFY);
if (ntimestep == output->next) {
timer->stamp();
output->write(ntimestep);
timer->stamp(TIME_OUTPUT);
timer->stamp(Timer::OUTPUT);
}
}
}
@ -498,7 +506,7 @@ void VerletSplitIntel::rk_setup()
atom->map_clear();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
}
}
}
@ -549,7 +557,7 @@ void VerletSplitIntel::r2k_comm()
if (tip4p_flag && !master) {
timer->stamp();
comm->forward_comm();
timer->stamp(TIME_COMM);
timer->stamp(Timer::COMM);
}
}

View File

@ -1442,30 +1442,6 @@ void Input::package()
delete [] fixarg;
} else if (strcmp(arg[0],"intel") == 0) {
// add omp package for non-pair routines
/*
char **fixarg = new char*[2+narg];
fixarg[0] = (char *) "package_omp";
fixarg[1] = (char *) "all";
fixarg[2] = (char *) "OMP";
int omp_narg = 3;
if (narg > 1) {
fixarg[3] = arg[1];
omp_narg++;
if (narg > 2)
for (int i = 2; i < narg; i++)
if (strcmp(arg[i],"mixed") == 0) {
fixarg[4] = arg[i];
omp_narg++;
}
}
modify->add_fix(omp_narg,fixarg);
// add intel package for neighbor and pair routines
*/
if (!modify->check_package("Intel"))
error->all(FLERR,
"Package intel command without USER-INTEL package installed");
@ -1478,17 +1454,12 @@ void Input::package()
modify->add_fix(2+narg,fixarg);
delete [] fixarg;
/*
// if running with offload, set run_style to verlet/intel
// set integrator = verlet/intel
// -sf intel does same thing in Update constructor via suffix
#ifdef LMP_INTEL_OFFLOAD
#ifdef __INTEL_OFFLOAD
char *str;
str = (char *) "verlet/intel";
update->create_integrate(1,&str,0);
#endif
#endif
*/
} else error->all(FLERR,"Illegal package command");
}

View File

@ -53,6 +53,15 @@ void Integrate::init()
else pair_compute_flag = 0;
if (force->kspace && force->kspace->compute_flag) kspace_compute_flag = 1;
else kspace_compute_flag = 0;
// should add checks:
// for any acceleration package that has its own integrate/minimize
// in case input script has reset the run or minimize style explicitly
// e.g. invalid to have intel pair style with non-intel verlet
// but OK to have intel verlet with non intel pair style (just warn)
// ditto for USER-CUDA and KOKKOS package verlet with their pair, fix, etc
// making these checks would require all the pair, fix, etc styles have
// cuda, kokkos, intel flags
}
/* ----------------------------------------------------------------------

View File

@ -667,6 +667,8 @@ void LAMMPS::post_create()
{
if (!suffix_enable) return;
// suffix will always be set if suffix_enable = 1
if (strcmp(suffix,"gpu") == 0 && !modify->check_package("GPU"))
error->all(FLERR,"Using suffix gpu without GPU package installed");
if (strcmp(suffix,"intel") == 0 && !modify->check_package("Intel"))
@ -674,7 +676,10 @@ void LAMMPS::post_create()
if (strcmp(suffix,"omp") == 0 && !modify->check_package("OMP"))
error->all(FLERR,"Using suffix omp without USER-OMP package installed");
if (strcmp(suffix2,"omp") == 0 && !modify->check_package("OMP")) {
// suffix2 only currently set by -sf intel
// need to unset if LAMMPS was not built with USER-OMP package
if (suffix2 && strcmp(suffix2,"omp") == 0 && !modify->check_package("OMP")) {
delete [] suffix2;
suffix2 = NULL;
}

View File

@ -47,9 +47,6 @@ class LAMMPS {
int cite_enable; // 1 if generating log.cite, 0 if disabled
class Cuda *cuda; // CUDA accelerator class
//class GPU *gpu; // GPU accelerator class
//class Intel *intel; // Intel accelerator class
//class OMP *omp; // OMP accelerator class
class KokkosLMP *kokkos; // KOKKOS accelerator class
class CiteMe *citeme; // citation info

View File

@ -167,6 +167,33 @@ typedef int bigint;
}
// preprocessor macros for compiler specific settings
// clear previous definitions to avoid redefinition warning
#ifdef _alignvar
#undef _alignvar
#endif
#ifdef _noalias
#undef _noalias
#endif
// define stack variable alignment
#if defined(__INTEL_COMPILER)
#define _alignvar(expr,val) __declspec(align(val)) expr
#elif defined(__GNUC__)
#define _alignvar(expr,val) expr __attribute((aligned(val)))
#else
#define _alignvar(expr,val) expr
#endif
// declaration to lift aliasing restrictions
#if defined(__INTEL_COMPILER)
#define _noalias restrict
#elif defined(__GNUC__)
#define _noalias __restrict
#else
#define _noalias
#endif
// settings to enable LAMMPS to build under Windows
#ifdef _WIN32

View File

@ -1 +1 @@
#define LAMMPS_VERSION "14 Aug 2014"
#define LAMMPS_VERSION "15 Aug 2014"