Merge branch 'lammps-icms' of bitbucket.org:akohlmey/lammps-icms into lammps-icms
Resolved Conflicts: doc/Manual.html doc/Manual.txt doc/package.txt
This commit is contained in:
@ -1,7 +1,7 @@
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS-ICMS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="14 Aug 2014 version">
|
||||
<META NAME="docnumber" CONTENT="15 Aug 2014 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -22,7 +22,7 @@
|
||||
|
||||
<CENTER><H3>LAMMPS-ICMS Documentation
|
||||
</H3></CENTER>
|
||||
<CENTER><H4>14 Aug 2014 version
|
||||
<CENTER><H4>15 Aug 2014 version
|
||||
</H4></CENTER>
|
||||
<H4>Version info:
|
||||
</H4>
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS-ICMS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="14 Aug 2014 version">
|
||||
<META NAME="docnumber" CONTENT="15 Aug 2014 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -18,7 +18,7 @@
|
||||
<H1></H1>
|
||||
|
||||
LAMMPS-ICMS Documentation :c,h3
|
||||
14 Aug 2014 version :c,h4
|
||||
15 Aug 2014 version :c,h4
|
||||
|
||||
Version info: :h4
|
||||
|
||||
|
||||
@ -109,7 +109,7 @@ it to LAMMPS.
|
||||
<LI> open-source distribution
|
||||
<LI> highly portable C++
|
||||
<LI> optional libraries used: MPI and single-processor FFT
|
||||
<LI> Intel Xeon Phi, GPU (CUDA and OpenCL), and OpenMP support for many code features
|
||||
<LI> GPU (CUDA and OpenCL), Intel Xeon Phi, and OpenMP support for many code features
|
||||
<LI> easy to extend with new features and functionality
|
||||
<LI> runs from an input script
|
||||
<LI> syntax for defining and using variables and formulas
|
||||
|
||||
@ -105,7 +105,7 @@ General features :h4
|
||||
open-source distribution
|
||||
highly portable C++
|
||||
optional libraries used: MPI and single-processor FFT
|
||||
Intel Xeon Phi, GPU (CUDA and OpenCL), and OpenMP support for many code features
|
||||
GPU (CUDA and OpenCL), Intel Xeon Phi, and OpenMP support for many code features
|
||||
easy to extend with new features and functionality
|
||||
runs from an input script
|
||||
syntax for defining and using variables and formulas
|
||||
|
||||
@ -39,7 +39,7 @@ args = arguments specific to the style :l
|
||||
{cellsize} value = dist
|
||||
dist = length (distance units) in each dimension for neighbor bins
|
||||
{device} value = device_type
|
||||
device_type = {kepler} or {fermi} or {cypress} or {phi} or {generic}
|
||||
device_type = {kepler} or {fermi} or {cypress} or {phi} or {intel} or {generic}
|
||||
{intel} args = Nthreads precision keyword value ...
|
||||
Nthreads = # of OpenMP threads to associate with each MPI process on host
|
||||
precision = {single} or {mixed} or {double}
|
||||
|
||||
@ -3,7 +3,9 @@
|
||||
# shape: 2 1.5 1
|
||||
# cutoff 4.0 with skin 0.8
|
||||
# NPT, T=2.4, P=8.0
|
||||
|
||||
package intel * mixed balance $b
|
||||
package omp *
|
||||
suffix $s
|
||||
processors * * * grid numa
|
||||
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
# Rhodopsin model
|
||||
|
||||
package intel * mixed balance $b
|
||||
package omp *
|
||||
suffix $s
|
||||
|
||||
variable x index 4
|
||||
|
||||
@ -407,9 +407,11 @@ void VerletSplit::run(int n)
|
||||
// all output
|
||||
|
||||
if (master) {
|
||||
timer->stamp();
|
||||
if (n_post_force) modify->post_force(vflag);
|
||||
modify->final_integrate();
|
||||
if (n_end_of_step) modify->end_of_step();
|
||||
timer->stamp(Timer::MODIFY);
|
||||
|
||||
if (ntimestep == output->next) {
|
||||
timer->stamp();
|
||||
|
||||
@ -67,41 +67,3 @@ elif (test $mode = 0) then
|
||||
touch ../accelerator_intel.h
|
||||
|
||||
fi
|
||||
|
||||
# step 3: map omp styles that are not in the intel package to intel suffix
|
||||
|
||||
#if (test $mode = 0) then
|
||||
#
|
||||
# rm -f ../*ompinto_intel*
|
||||
#
|
||||
#else
|
||||
#
|
||||
# echo " The 'intel' suffix will use the USER-OMP package for all"
|
||||
# echo " angle, bond, dihedral, kspace, and improper styles:"
|
||||
# stylelist="pair fix angle bond dihedral improper"
|
||||
# for header in $stylelist; do
|
||||
# HEADER=`echo $header | sed 's/\(.*\)/\U\1/'`
|
||||
# outfile=../$header"_ompinto_intel.h"
|
||||
# echo " Creating $header style map: $outfile"
|
||||
# echo -n "// -- Header to map USER-OMP " > $outfile
|
||||
# echo "styles to the intel suffix" >> $outfile
|
||||
# echo >> $outfile
|
||||
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
|
||||
# grep -h 'Style(' ../$header*_omp.h | grep -v 'charmm/coul/long' | \
|
||||
# grep -v 'lj/cut' | grep -v 'gayberne' | \
|
||||
# sed 's/\/omp/\/intel/g' >> $outfile
|
||||
# echo "#endif" >> $outfile
|
||||
# done
|
||||
#
|
||||
# header="kspace"
|
||||
# HEADER="KSPACE"
|
||||
# outfile=../$header"_ompinto_intel.h"
|
||||
# echo " Creating $header style map: $outfile"
|
||||
# echo -n "// -- Header to map USER-OMP " > $outfile
|
||||
# echo "styles to the intel suffix" >> $outfile
|
||||
# echo >> $outfile
|
||||
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
|
||||
# grep -h 'KSpaceStyle(' ../*_omp.h | sed 's/\/omp/\/intel/g' >> $outfile
|
||||
# echo "#endif" >> $outfile
|
||||
#
|
||||
#fi
|
||||
|
||||
@ -128,7 +128,7 @@ class FixIntel : public Fix {
|
||||
|
||||
protected:
|
||||
int _overflow_flag[5];
|
||||
__declspec(align(64)) int _off_overflow_flag[5];
|
||||
_alignvar(int _off_overflow_flag[5],64);
|
||||
int _allow_separate_buffers, _offload_ghost;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
double _balance_pair_time, _balance_other_time;
|
||||
@ -155,18 +155,18 @@ class FixIntel : public Fix {
|
||||
double _offload_balance, _balance_neighbor, _balance_pair, _balance_fixed;
|
||||
double _timers[NUM_ITIMERS];
|
||||
double _stopwatch[NUM_ITIMERS];
|
||||
__declspec(align(64)) double _stopwatch_offload_neighbor[1];
|
||||
__declspec(align(64)) double _stopwatch_offload_pair[1];
|
||||
_alignvar(double _stopwatch_offload_neighbor[1],64);
|
||||
_alignvar(double _stopwatch_offload_pair[1],64);
|
||||
|
||||
template <class ft, class acc_t>
|
||||
inline void add_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
inline void add_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int offload);
|
||||
|
||||
template <class ft, class acc_t>
|
||||
inline void add_oresults(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
inline void add_oresults(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int out_offset, const int nall);
|
||||
|
||||
@ -176,8 +176,8 @@ class FixIntel : public Fix {
|
||||
int _im_real_space_task;
|
||||
MPI_Comm _real_space_comm;
|
||||
template <class ft, class acc_t>
|
||||
inline void add_off_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global);
|
||||
inline void add_off_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global);
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -284,8 +284,8 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class ft, class acc_t>
|
||||
void FixIntel::add_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
void FixIntel::add_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int offload) {
|
||||
start_watch(TIME_PACK);
|
||||
@ -295,7 +295,7 @@ void FixIntel::add_results(const ft * restrict const f_in,
|
||||
if (offload) {
|
||||
add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal);
|
||||
if (force->newton_pair) {
|
||||
const acc_t * restrict const enull = 0;
|
||||
const acc_t * _noalias const enull = 0;
|
||||
int offset = _offload_nlocal;
|
||||
if (atom->torque) offset *= 2;
|
||||
add_oresults(f_in + offset, enull, eatom, vatom,
|
||||
@ -305,7 +305,7 @@ void FixIntel::add_results(const ft * restrict const f_in,
|
||||
add_oresults(f_in, ev_global, eatom, vatom,
|
||||
_host_min_local, _host_used_local);
|
||||
if (force->newton_pair) {
|
||||
const acc_t * restrict const enull = 0;
|
||||
const acc_t * _noalias const enull = 0;
|
||||
int offset = _host_used_local;
|
||||
if (atom->torque) offset *= 2;
|
||||
add_oresults(f_in + offset, enull, eatom,
|
||||
@ -333,11 +333,11 @@ void FixIntel::add_results(const ft * restrict const f_in,
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class ft, class acc_t>
|
||||
void FixIntel::add_oresults(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
void FixIntel::add_oresults(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int out_offset, const int nall) {
|
||||
lmp_ft * restrict const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
|
||||
lmp_ft * _noalias const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
|
||||
if (atom->torque) {
|
||||
if (f_in[1].w)
|
||||
if (f_in[1].w == 1)
|
||||
@ -356,7 +356,7 @@ void FixIntel::add_oresults(const ft * restrict const f_in,
|
||||
IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t));
|
||||
if (atom->torque) {
|
||||
int ii = ifrom * 2;
|
||||
lmp_ft * restrict const tor = (lmp_ft *) lmp->atom->torque[0] +
|
||||
lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] +
|
||||
out_offset;
|
||||
if (eatom) {
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
@ -464,8 +464,8 @@ void FixIntel::set_neighbor_host_sizes() {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class ft, class acc_t>
|
||||
void FixIntel::add_off_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global) {
|
||||
void FixIntel::add_off_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global) {
|
||||
if (_offload_balance < 0.0)
|
||||
_balance_other_time = MPI_Wtime() - _balance_other_time;
|
||||
|
||||
|
||||
@ -22,8 +22,8 @@ using namespace LAMMPS_NS;
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
|
||||
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _buf_size(0),
|
||||
_buf_local_size(0), _off_threads(0) {
|
||||
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _off_threads(0),
|
||||
_buf_size(0), _buf_local_size(0) {
|
||||
_list_alloc_atoms = 0;
|
||||
_ntypes = 0;
|
||||
_off_map_maxlocal = 0;
|
||||
@ -423,6 +423,8 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
|
||||
tmem += _off_map_maxlocal * sizeof(int);
|
||||
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
|
||||
tmem += _ntypes * _ntypes * sizeof(int);
|
||||
|
||||
return tmem;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -266,8 +266,8 @@ class IntelBuffers {
|
||||
#endif
|
||||
|
||||
int _buf_size, _buf_local_size;
|
||||
__declspec(align(64)) acc_t _ev_global[8];
|
||||
__declspec(align(64)) acc_t _ev_global_host[8];
|
||||
_alignvar(acc_t _ev_global[8],64);
|
||||
_alignvar(acc_t _ev_global_host[8],64);
|
||||
|
||||
void _grow(const int nall, const int nlocal, const int nthreads,
|
||||
const int offload_end);
|
||||
|
||||
@ -351,7 +351,7 @@ inline double MIC_Wtime() {
|
||||
for (int t = 1; t < nthreads; t++) { \
|
||||
_Pragma("vector nontemporal") \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
f_start[n].x += f_start[n + t_off].x; \
|
||||
f_start[n].x += f_start[n + t_off].x; \
|
||||
f_start[n].y += f_start[n + t_off].y; \
|
||||
f_start[n].z += f_start[n + t_off].z; \
|
||||
f_start[n].w += f_start[n + t_off].w; \
|
||||
@ -361,7 +361,7 @@ inline double MIC_Wtime() {
|
||||
} else { \
|
||||
for (int t = 1; t < nthreads; t++) { \
|
||||
_Pragma("vector nontemporal") \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
f_start[n].x += f_start[n + t_off].x; \
|
||||
f_start[n].y += f_start[n + t_off].y; \
|
||||
f_start[n].z += f_start[n + t_off].z; \
|
||||
@ -372,7 +372,7 @@ inline double MIC_Wtime() {
|
||||
\
|
||||
if (evflag) { \
|
||||
if (vflag == 2) { \
|
||||
const ATOM_T * restrict const xo = x + minlocal; \
|
||||
const ATOM_T * _noalias const xo = x + minlocal; \
|
||||
_Pragma("vector nontemporal") \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
ov0 += f_start[n].x * xo[n].x; \
|
||||
|
||||
@ -105,7 +105,7 @@ inline int mcoord2bin(const flt_t x0, const flt_t x1, const flt_t x2,
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void Neighbor::bin_atoms(void * xin) {
|
||||
const ATOM_T * restrict const x = (const ATOM_T * restrict const)xin;
|
||||
const ATOM_T * _noalias const x = (const ATOM_T * _noalias const)xin;
|
||||
int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
@ -243,8 +243,8 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
||||
return;
|
||||
}
|
||||
|
||||
const ATOM_T * restrict const x = buffers->get_x();
|
||||
int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL, *s = NULL;
|
||||
@ -260,17 +260,17 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
||||
tag_size = 0;
|
||||
special_size = 0;
|
||||
}
|
||||
const int * restrict const special = s;
|
||||
const int * restrict const nspecial = ns;
|
||||
const int * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const int * restrict const tag = atom->tag;
|
||||
const int * _noalias const tag = atom->tag;
|
||||
|
||||
int * restrict const ilist = list->ilist;
|
||||
int * restrict numneigh = list->numneigh;
|
||||
int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = list->nstencil;
|
||||
const int * restrict const stencil = list->stencil;
|
||||
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int * _noalias const stencil = list->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
@ -316,8 +316,8 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * restrict const binhead = this->binhead;
|
||||
const int * restrict const special_flag = this->special_flag;
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const special_flag = this->special_flag;
|
||||
const int nbinx = this->nbinx;
|
||||
const int nbiny = this->nbiny;
|
||||
const int nbinz = this->nbinz;
|
||||
@ -327,7 +327,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
||||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * restrict const bins = this->bins;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = fix->coprocessor_number();
|
||||
const int separate_buffers = fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
@ -486,7 +486,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
||||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
@ -507,7 +507,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
@ -662,8 +662,8 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
||||
return;
|
||||
}
|
||||
|
||||
const ATOM_T * restrict const x = buffers->get_x();
|
||||
int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
int nall_t = nall;
|
||||
if (offload_noghost && offload) nall_t = atom->nlocal;
|
||||
const int e_nall = nall_t;
|
||||
@ -682,17 +682,17 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
||||
tag_size = 0;
|
||||
special_size = 0;
|
||||
}
|
||||
const int * restrict const special = s;
|
||||
const int * restrict const nspecial = ns;
|
||||
const int * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const int * restrict const tag = atom->tag;
|
||||
const int * _noalias const tag = atom->tag;
|
||||
|
||||
int * restrict const ilist = list->ilist;
|
||||
int * restrict numneigh = list->numneigh;
|
||||
int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = list->nstencil;
|
||||
const int * restrict const stencil = list->stencil;
|
||||
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int * _noalias const stencil = list->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
@ -737,8 +737,8 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * restrict const binhead = this->binhead;
|
||||
const int * restrict const special_flag = this->special_flag;
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const special_flag = this->special_flag;
|
||||
const int nbinx = this->nbinx;
|
||||
const int nbiny = this->nbiny;
|
||||
const int nbinz = this->nbinz;
|
||||
@ -748,7 +748,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
||||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * restrict const bins = this->bins;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = fix->coprocessor_number();
|
||||
const int separate_buffers = fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
@ -948,7 +948,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
||||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
@ -970,7 +970,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
@ -1127,8 +1127,8 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
return;
|
||||
}
|
||||
|
||||
const ATOM_T * restrict const x = buffers->get_x();
|
||||
int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
int nall_t = nall;
|
||||
if (offload_noghost && offload) nall_t = atom->nlocal;
|
||||
const int e_nall = nall_t;
|
||||
@ -1147,17 +1147,17 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
tag_size = 0;
|
||||
special_size = 0;
|
||||
}
|
||||
const int * restrict const special = s;
|
||||
const int * restrict const nspecial = ns;
|
||||
const int * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const int * restrict const tag = atom->tag;
|
||||
const int * _noalias const tag = atom->tag;
|
||||
|
||||
int * restrict const ilist = list->ilist;
|
||||
int * restrict numneigh = list->numneigh;
|
||||
int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = list->nstencil;
|
||||
const int * restrict const stencil = list->stencil;
|
||||
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int * _noalias const stencil = list->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
@ -1202,8 +1202,8 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * restrict const binhead = this->binhead;
|
||||
const int * restrict const special_flag = this->special_flag;
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const special_flag = this->special_flag;
|
||||
const int nbinx = this->nbinx;
|
||||
const int nbiny = this->nbiny;
|
||||
const int nbinz = this->nbinz;
|
||||
@ -1213,7 +1213,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * restrict const bins = this->bins;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = fix->coprocessor_number();
|
||||
const int separate_buffers = fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
@ -1386,7 +1386,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
@ -1407,7 +1407,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
|
||||
@ -79,7 +79,7 @@ void PairGayBerneIntel::compute(int eflag, int vflag,
|
||||
fix->start_watch(TIME_PACK);
|
||||
const AtomVecEllipsoid::Bonus * const bonus = avec->bonus;
|
||||
const int * const ellipsoid = atom->ellipsoid;
|
||||
QUAT_T * restrict const quat = buffers->get_quat();
|
||||
QUAT_T * _noalias const quat = buffers->get_quat();
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
|
||||
#endif
|
||||
@ -150,8 +150,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
fix->get_buffern(offload, nlocal, nall, minlocal);
|
||||
|
||||
const int ago = neighbor->ago;
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
QUAT_T * restrict const quat = buffers->get_quat(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
QUAT_T * _noalias const quat = buffers->get_quat(offload);
|
||||
const AtomVecEllipsoid::Bonus *bonus = avec->bonus;
|
||||
const int *ellipsoid = atom->ellipsoid;
|
||||
|
||||
@ -225,15 +225,15 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
#endif
|
||||
|
||||
// const int * restrict const ilist = list->ilist;
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
// const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
|
||||
const FC_PACKED1_T * restrict const ijc = fc.ijc[0];
|
||||
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
|
||||
const FC_PACKED3_T * restrict const ic = fc.ic;
|
||||
const FC_PACKED1_T * _noalias const ijc = fc.ijc[0];
|
||||
const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0];
|
||||
const FC_PACKED3_T * _noalias const ic = fc.ic;
|
||||
const flt_t mu = fc.mu;
|
||||
const flt_t gamma = fc.gamma;
|
||||
const flt_t upsilon = fc.upsilon;
|
||||
@ -255,8 +255,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
const int max_nbors = _max_nbors;
|
||||
const int nthreads = tc;
|
||||
@ -351,25 +351,25 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal * 2 + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal * 2 + (tid * f_stride);
|
||||
memset(f + minlocal * 2, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
flt_t * restrict const rsq_form = rsq_formi + tid * max_nbors;
|
||||
flt_t * restrict const delx_form = delx_formi + tid * max_nbors;
|
||||
flt_t * restrict const dely_form = dely_formi + tid * max_nbors;
|
||||
flt_t * restrict const delz_form = delz_formi + tid * max_nbors;
|
||||
int * restrict const jtype_form = jtype_formi + tid * max_nbors;
|
||||
int * restrict const jlist_form = jlist_formi + tid * max_nbors;
|
||||
flt_t * _noalias const rsq_form = rsq_formi + tid * max_nbors;
|
||||
flt_t * _noalias const delx_form = delx_formi + tid * max_nbors;
|
||||
flt_t * _noalias const dely_form = dely_formi + tid * max_nbors;
|
||||
flt_t * _noalias const delz_form = delz_formi + tid * max_nbors;
|
||||
int * _noalias const jtype_form = jtype_formi + tid * max_nbors;
|
||||
int * _noalias const jlist_form = jlist_formi + tid * max_nbors;
|
||||
|
||||
int ierror = 0;
|
||||
for (int i = iifrom; i < iito; ++i) {
|
||||
// const int i = ilist[ii];
|
||||
const int itype = x[i].w;
|
||||
const int ptr_off = itype * ntypes;
|
||||
const FC_PACKED1_T * restrict const ijci = ijc + ptr_off;
|
||||
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
|
||||
const FC_PACKED1_T * _noalias const ijci = ijc + ptr_off;
|
||||
const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
const flt_t xtmp = x[i].x;
|
||||
@ -819,7 +819,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
|
||||
if (EVFLAG) {
|
||||
if (vflag==2) {
|
||||
const ATOM_T * restrict const xo = x + minlocal;
|
||||
const ATOM_T * _noalias const xo = x + minlocal;
|
||||
#pragma vector nontemporal
|
||||
for (int n = iifrom; n < iito; n++) {
|
||||
const int nt2 = n * 2;
|
||||
|
||||
@ -62,7 +62,10 @@ class PairGayBerneIntel : public PairGayBerne {
|
||||
typedef struct { flt_t lj3, lj4; } fc_packed2;
|
||||
typedef struct { flt_t shape2[4], well[4]; } fc_packed3;
|
||||
|
||||
__declspec(align(64)) flt_t special_lj[4], gamma, upsilon, mu;
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
_alignvar(flt_t gamma,64);
|
||||
_alignvar(flt_t upsilon,64);
|
||||
_alignvar(flt_t mu,64);
|
||||
fc_packed1 **ijc;
|
||||
fc_packed2 **lj34;
|
||||
fc_packed3 *ic;
|
||||
|
||||
@ -143,25 +143,25 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const int ago = neighbor->ago;
|
||||
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
flt_t * restrict const q = buffers->get_q(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
flt_t * _noalias const q = buffers->get_q(offload);
|
||||
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const flt_t * restrict const special_coul = fc.special_coul;
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const flt_t qqrd2e = force->qqrd2e;
|
||||
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
|
||||
|
||||
const flt_t * restrict const cutsq = fc.cutsq[0];
|
||||
const LJ_T * restrict const lj = fc.lj[0];
|
||||
const TABLE_T * restrict const table = fc.table;
|
||||
const flt_t * restrict const etable = fc.etable;
|
||||
const flt_t * restrict const detable = fc.detable;
|
||||
const flt_t * restrict const ctable = fc.ctable;
|
||||
const flt_t * restrict const dctable = fc.dctable;
|
||||
const flt_t * _noalias const cutsq = fc.cutsq[0];
|
||||
const LJ_T * _noalias const lj = fc.lj[0];
|
||||
const TABLE_T * _noalias const table = fc.table;
|
||||
const flt_t * _noalias const etable = fc.etable;
|
||||
const flt_t * _noalias const detable = fc.detable;
|
||||
const flt_t * _noalias const ctable = fc.ctable;
|
||||
const flt_t * _noalias const dctable = fc.dctable;
|
||||
const flt_t cut_ljsq = fc.cut_ljsq;
|
||||
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
|
||||
const flt_t cut_coulsq = fc.cut_coulsq;
|
||||
@ -178,8 +178,8 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
|
||||
const int nthreads = tc;
|
||||
@ -242,7 +242,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
|
||||
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
flt_t cutboth = cut_coulsq;
|
||||
|
||||
@ -251,10 +251,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const int itype = x[i].w;
|
||||
|
||||
const int ptr_off = itype * ntypes;
|
||||
const flt_t * restrict const cutsqi = cutsq + ptr_off;
|
||||
const LJ_T * restrict const lji = lj + ptr_off;
|
||||
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
|
||||
const LJ_T * _noalias const lji = lj + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
|
||||
@ -62,8 +62,8 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {
|
||||
class ForceConst {
|
||||
public:
|
||||
typedef struct { flt_t r, dr, f, df; } table_t;
|
||||
__declspec(align(64)) flt_t special_coul[4];
|
||||
__declspec(align(64)) flt_t special_lj[4];
|
||||
_alignvar(flt_t special_coul[4],64);
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
flt_t **cutsq, g_ewald, tabinnersq;
|
||||
flt_t cut_coulsq, cut_ljsq;
|
||||
flt_t cut_lj_innersq;
|
||||
|
||||
@ -143,24 +143,24 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const int ago = neighbor->ago;
|
||||
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
flt_t * restrict const q = buffers->get_q(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
flt_t * _noalias const q = buffers->get_q(offload);
|
||||
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const flt_t * restrict const special_coul = fc.special_coul;
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const flt_t qqrd2e = force->qqrd2e;
|
||||
|
||||
const C_FORCE_T * restrict const c_force = fc.c_force[0];
|
||||
const C_ENERGY_T * restrict const c_energy = fc.c_energy[0];
|
||||
const TABLE_T * restrict const table = fc.table;
|
||||
const flt_t * restrict const etable = fc.etable;
|
||||
const flt_t * restrict const detable = fc.detable;
|
||||
const flt_t * restrict const ctable = fc.ctable;
|
||||
const flt_t * restrict const dctable = fc.dctable;
|
||||
const C_FORCE_T * _noalias const c_force = fc.c_force[0];
|
||||
const C_ENERGY_T * _noalias const c_energy = fc.c_energy[0];
|
||||
const TABLE_T * _noalias const table = fc.table;
|
||||
const flt_t * _noalias const etable = fc.etable;
|
||||
const flt_t * _noalias const detable = fc.detable;
|
||||
const flt_t * _noalias const ctable = fc.ctable;
|
||||
const flt_t * _noalias const dctable = fc.dctable;
|
||||
const flt_t g_ewald = fc.g_ewald;
|
||||
const flt_t tabinnersq = fc.tabinnersq;
|
||||
|
||||
@ -174,8 +174,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
|
||||
const int nthreads = tc;
|
||||
@ -237,17 +237,17 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
|
||||
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
for (int i = iifrom; i < iito; ++i) {
|
||||
const int itype = x[i].w;
|
||||
|
||||
const int ptr_off = itype * ntypes;
|
||||
const C_FORCE_T * restrict const c_forcei = c_force + ptr_off;
|
||||
const C_ENERGY_T * restrict const c_energyi = c_energy + ptr_off;
|
||||
const C_FORCE_T * _noalias const c_forcei = c_force + ptr_off;
|
||||
const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
|
||||
@ -64,8 +64,8 @@ class PairLJCutCoulLongIntel : public PairLJCutCoulLong {
|
||||
typedef struct { flt_t cutsq, cut_ljsq, lj1, lj2; } c_force_t;
|
||||
typedef struct { flt_t lj3, lj4, offset, pad; } c_energy_t;
|
||||
typedef struct { flt_t r, dr, f, df; } table_t;
|
||||
__declspec(align(64)) flt_t special_coul[4];
|
||||
__declspec(align(64)) flt_t special_lj[4];
|
||||
_alignvar(flt_t special_coul[4],64);
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
flt_t g_ewald, tabinnersq;
|
||||
c_force_t **c_force;
|
||||
c_energy_t **c_energy;
|
||||
|
||||
@ -134,14 +134,14 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
const int ago = neighbor->ago;
|
||||
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
const FC_PACKED1_T * restrict const ljc12o = fc.ljc12o[0];
|
||||
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const FC_PACKED1_T * _noalias const ljc12o = fc.ljc12o[0];
|
||||
const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0];
|
||||
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int eatom = this->eflag_atom;
|
||||
@ -153,8 +153,8 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
const int nthreads = tc;
|
||||
int *overflow = fix->get_off_overflow_flag();
|
||||
@ -184,17 +184,17 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
|
||||
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
for (int i = iifrom; i < iito; ++i) {
|
||||
const int itype = x[i].w;
|
||||
|
||||
const int ptr_off = itype * ntypes;
|
||||
const FC_PACKED1_T * restrict const ljc12oi = ljc12o + ptr_off;
|
||||
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
|
||||
const FC_PACKED1_T * _noalias const ljc12oi = ljc12o + ptr_off;
|
||||
const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
acc_t fxtmp, fytmp, fztmp, fwtmp;
|
||||
|
||||
@ -62,7 +62,7 @@ class PairLJCutIntel : public PairLJCut {
|
||||
typedef struct { flt_t cutsq, lj1, lj2, offset; } fc_packed1;
|
||||
typedef struct { flt_t lj3, lj4; } fc_packed2;
|
||||
|
||||
__declspec(align(64)) flt_t special_lj[4];
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
fc_packed1 **ljc12o;
|
||||
fc_packed2 **lj34;
|
||||
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "bond.h"
|
||||
@ -81,14 +82,9 @@ void VerletIntel::init()
|
||||
// set flags for what arrays to clear in force_clear()
|
||||
// need to clear additionals arrays if they exist
|
||||
|
||||
torqueflag = 0;
|
||||
torqueflag = extraflag = 0;
|
||||
if (atom->torque_flag) torqueflag = 1;
|
||||
erforceflag = 0;
|
||||
if (atom->erforce_flag) erforceflag = 1;
|
||||
e_flag = 0;
|
||||
if (atom->e_flag) e_flag = 1;
|
||||
rho_flag = 0;
|
||||
if (atom->rho_flag) rho_flag = 1;
|
||||
if (atom->avec->forceclearflag) extraflag = 1;
|
||||
|
||||
// orthogonal vs triclinic simulation box
|
||||
|
||||
@ -276,8 +272,10 @@ void VerletIntel::run(int n)
|
||||
|
||||
// initial time integration
|
||||
|
||||
timer->stamp();
|
||||
modify->initial_integrate(vflag);
|
||||
if (n_post_integrate) modify->post_integrate();
|
||||
timer->stamp(Timer::MODIFY);
|
||||
|
||||
// regular communication vs neighbor list rebuild
|
||||
|
||||
@ -286,9 +284,13 @@ void VerletIntel::run(int n)
|
||||
if (nflag == 0) {
|
||||
timer->stamp();
|
||||
comm->forward_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
} else {
|
||||
if (n_pre_exchange) modify->pre_exchange();
|
||||
if (n_pre_exchange) {
|
||||
timer->stamp();
|
||||
modify->pre_exchange();
|
||||
timer->stamp(Timer::MODIFY);
|
||||
}
|
||||
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||
domain->pbc();
|
||||
if (domain->box_change) {
|
||||
@ -301,10 +303,13 @@ void VerletIntel::run(int n)
|
||||
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
|
||||
comm->borders();
|
||||
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||
timer->stamp(TIME_COMM);
|
||||
if (n_pre_neighbor) modify->pre_neighbor();
|
||||
timer->stamp(Timer::COMM);
|
||||
if (n_pre_neighbor) {
|
||||
modify->pre_neighbor();
|
||||
timer->stamp(Timer::MODIFY);
|
||||
}
|
||||
neighbor->build();
|
||||
timer->stamp(TIME_NEIGHBOR);
|
||||
timer->stamp(Timer::NEIGH);
|
||||
}
|
||||
|
||||
// force computations
|
||||
@ -313,13 +318,18 @@ void VerletIntel::run(int n)
|
||||
// and Pair:ev_tally() needs to be called before any tallying
|
||||
|
||||
force_clear();
|
||||
if (n_pre_force) modify->pre_force(vflag);
|
||||
|
||||
timer->stamp();
|
||||
|
||||
if (n_pre_force) {
|
||||
modify->pre_force(vflag);
|
||||
timer->stamp(Timer::MODIFY);
|
||||
}
|
||||
|
||||
|
||||
if (pair_compute_flag) {
|
||||
force->pair->compute(eflag,vflag);
|
||||
timer->stamp(TIME_PAIR);
|
||||
timer->stamp(Timer::PAIR);
|
||||
}
|
||||
|
||||
if (atom->molecular) {
|
||||
@ -327,18 +337,18 @@ void VerletIntel::run(int n)
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
timer->stamp(TIME_BOND);
|
||||
timer->stamp(Timer::BOND);
|
||||
}
|
||||
|
||||
if (kspace_compute_flag) {
|
||||
force->kspace->compute(eflag,vflag);
|
||||
timer->stamp(TIME_KSPACE);
|
||||
timer->stamp(Timer::KSPACE);
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (sync_mode == 1) {
|
||||
fix_intel->sync_coprocessor();
|
||||
timer->stamp(TIME_PAIR);
|
||||
timer->stamp(Timer::PAIR);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -346,13 +356,13 @@ void VerletIntel::run(int n)
|
||||
|
||||
if (force->newton) {
|
||||
comm->reverse_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (sync_mode == 2) {
|
||||
fix_intel->sync_coprocessor();
|
||||
timer->stamp(TIME_PAIR);
|
||||
timer->stamp(Timer::PAIR);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -361,13 +371,14 @@ void VerletIntel::run(int n)
|
||||
if (n_post_force) modify->post_force(vflag);
|
||||
modify->final_integrate();
|
||||
if (n_end_of_step) modify->end_of_step();
|
||||
timer->stamp(Timer::MODIFY);
|
||||
|
||||
// all output
|
||||
|
||||
if (ntimestep == output->next) {
|
||||
timer->stamp();
|
||||
output->write(ntimestep);
|
||||
timer->stamp(TIME_OUTPUT);
|
||||
timer->stamp(Timer::OUTPUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -388,7 +399,7 @@ void VerletIntel::cleanup()
|
||||
|
||||
void VerletIntel::force_clear()
|
||||
{
|
||||
int i;
|
||||
size_t nbytes;
|
||||
|
||||
if (external_force_clear) return;
|
||||
|
||||
@ -396,19 +407,16 @@ void VerletIntel::force_clear()
|
||||
// if either newton flag is set, also include ghosts
|
||||
// when using threads always clear all forces.
|
||||
|
||||
if (neighbor->includegroup == 0) {
|
||||
int nall;
|
||||
if (force->newton) nall = atom->nlocal + atom->nghost;
|
||||
else nall = atom->nlocal;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
size_t nbytes = sizeof(double) * nall;
|
||||
if (neighbor->includegroup == 0) {
|
||||
nbytes = sizeof(double) * nlocal;
|
||||
if (force->newton) nbytes += sizeof(double) * atom->nghost;
|
||||
|
||||
if (nbytes) {
|
||||
memset(&(atom->f[0][0]),0,3*nbytes);
|
||||
if (torqueflag) memset(&(atom->torque[0][0]),0,3*nbytes);
|
||||
if (erforceflag) memset(&(atom->erforce[0]), 0, nbytes);
|
||||
if (e_flag) memset(&(atom->de[0]), 0, nbytes);
|
||||
if (rho_flag) memset(&(atom->drho[0]), 0, nbytes);
|
||||
memset(&atom->f[0][0],0,3*nbytes);
|
||||
if (torqueflag) memset(&atom->torque[0][0],0,3*nbytes);
|
||||
if (extraflag) atom->avec->force_clear(0,nbytes);
|
||||
}
|
||||
|
||||
// neighbor includegroup flag is set
|
||||
@ -416,70 +424,21 @@ void VerletIntel::force_clear()
|
||||
// if either newton flag is set, also include ghosts
|
||||
|
||||
} else {
|
||||
int nall = atom->nfirst;
|
||||
nbytes = sizeof(double) * atom->nfirst;
|
||||
|
||||
double **f = atom->f;
|
||||
for (i = 0; i < nall; i++) {
|
||||
f[i][0] = 0.0;
|
||||
f[i][1] = 0.0;
|
||||
f[i][2] = 0.0;
|
||||
}
|
||||
|
||||
if (torqueflag) {
|
||||
double **torque = atom->torque;
|
||||
for (i = 0; i < nall; i++) {
|
||||
torque[i][0] = 0.0;
|
||||
torque[i][1] = 0.0;
|
||||
torque[i][2] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (erforceflag) {
|
||||
double *erforce = atom->erforce;
|
||||
for (i = 0; i < nall; i++) erforce[i] = 0.0;
|
||||
}
|
||||
|
||||
if (e_flag) {
|
||||
double *de = atom->de;
|
||||
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||
}
|
||||
|
||||
if (rho_flag) {
|
||||
double *drho = atom->drho;
|
||||
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||
if (nbytes) {
|
||||
memset(&atom->f[0][0],0,3*nbytes);
|
||||
if (torqueflag) memset(&atom->torque[0][0],0,3*nbytes);
|
||||
if (extraflag) atom->avec->force_clear(0,nbytes);
|
||||
}
|
||||
|
||||
if (force->newton) {
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
nbytes = sizeof(double) * atom->nghost;
|
||||
|
||||
for (i = atom->nlocal; i < nall; i++) {
|
||||
f[i][0] = 0.0;
|
||||
f[i][1] = 0.0;
|
||||
f[i][2] = 0.0;
|
||||
}
|
||||
|
||||
if (torqueflag) {
|
||||
double **torque = atom->torque;
|
||||
for (i = atom->nlocal; i < nall; i++) {
|
||||
torque[i][0] = 0.0;
|
||||
torque[i][1] = 0.0;
|
||||
torque[i][2] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (erforceflag) {
|
||||
double *erforce = atom->erforce;
|
||||
for (i = atom->nlocal; i < nall; i++) erforce[i] = 0.0;
|
||||
}
|
||||
|
||||
if (e_flag) {
|
||||
double *de = atom->de;
|
||||
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||
}
|
||||
|
||||
if (rho_flag) {
|
||||
double *drho = atom->drho;
|
||||
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||
if (nbytes) {
|
||||
memset(&atom->f[nlocal][0],0,3*nbytes);
|
||||
if (torqueflag) memset(&atom->torque[nlocal][0],0,3*nbytes);
|
||||
if (extraflag) atom->avec->force_clear(nlocal,nbytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,8 +39,7 @@ class VerletIntel : public Integrate {
|
||||
|
||||
protected:
|
||||
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
||||
int torqueflag,erforceflag;
|
||||
int e_flag,rho_flag;
|
||||
int torqueflag,extraflag;
|
||||
|
||||
virtual void force_clear();
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
|
||||
@ -52,6 +52,9 @@ VerletSplitIntel::VerletSplitIntel(LAMMPS *lmp, int narg, char **arg) :
|
||||
if (universe->procs_per_world[0] % universe->procs_per_world[1])
|
||||
error->universe_all(FLERR,"Verlet/split requires Rspace partition "
|
||||
"size be multiple of Kspace partition size");
|
||||
if (comm->style != 0)
|
||||
error->universe_all(FLERR,"Verlet/split can only currently be used with "
|
||||
"comm_style brick");
|
||||
|
||||
// master = 1 for Rspace procs, 0 for Kspace procs
|
||||
|
||||
@ -214,6 +217,9 @@ VerletSplitIntel::~VerletSplitIntel()
|
||||
|
||||
void VerletSplitIntel::init()
|
||||
{
|
||||
if (comm->style != 0)
|
||||
error->universe_all(FLERR,"Verlet/split can only currently be used with "
|
||||
"comm_style brick");
|
||||
if (!force->kspace && comm->me == 0)
|
||||
error->warning(FLERR,"No Kspace calculation with verlet/split");
|
||||
|
||||
@ -277,7 +283,7 @@ void VerletSplitIntel::run(int n)
|
||||
|
||||
MPI_Barrier(universe->uworld);
|
||||
timer->init();
|
||||
timer->barrier_start(TIME_LOOP);
|
||||
timer->barrier_start();
|
||||
|
||||
// setup initial Rspace <-> Kspace comm params
|
||||
|
||||
@ -323,7 +329,7 @@ void VerletSplitIntel::run(int n)
|
||||
if (nflag == 0) {
|
||||
timer->stamp();
|
||||
comm->forward_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
} else {
|
||||
if (n_pre_exchange) modify->pre_exchange();
|
||||
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||
@ -338,10 +344,10 @@ void VerletSplitIntel::run(int n)
|
||||
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
|
||||
comm->borders();
|
||||
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
if (n_pre_neighbor) modify->pre_neighbor();
|
||||
neighbor->build();
|
||||
timer->stamp(TIME_NEIGHBOR);
|
||||
timer->stamp(Timer::NEIGH);
|
||||
}
|
||||
}
|
||||
|
||||
@ -361,7 +367,7 @@ void VerletSplitIntel::run(int n)
|
||||
timer->stamp();
|
||||
if (force->pair) {
|
||||
force->pair->compute(eflag,vflag);
|
||||
timer->stamp(TIME_PAIR);
|
||||
timer->stamp(Timer::PAIR);
|
||||
}
|
||||
|
||||
if (atom->molecular) {
|
||||
@ -369,25 +375,25 @@ void VerletSplitIntel::run(int n)
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
timer->stamp(TIME_BOND);
|
||||
timer->stamp(Timer::BOND);
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (sync_mode == 1) {
|
||||
fix_intel->sync_coprocessor();
|
||||
timer->stamp(TIME_PAIR);
|
||||
timer->stamp(Timer::PAIR);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (force->newton) {
|
||||
comm->reverse_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (sync_mode == 2) {
|
||||
fix_intel->sync_coprocessor();
|
||||
timer->stamp(TIME_PAIR);
|
||||
timer->stamp(Timer::PAIR);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -400,14 +406,14 @@ void VerletSplitIntel::run(int n)
|
||||
if (force->kspace) {
|
||||
timer->stamp();
|
||||
force->kspace->compute(eflag,vflag);
|
||||
timer->stamp(TIME_KSPACE);
|
||||
timer->stamp(Timer::KSPACE);
|
||||
}
|
||||
|
||||
// TIP4P PPPM puts forces on ghost atoms, so must reverse_comm()
|
||||
|
||||
if (tip4p_flag && force->newton) {
|
||||
comm->reverse_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
}
|
||||
}
|
||||
|
||||
@ -419,14 +425,16 @@ void VerletSplitIntel::run(int n)
|
||||
// all output
|
||||
|
||||
if (master) {
|
||||
timer->stamp();
|
||||
if (n_post_force) modify->post_force(vflag);
|
||||
modify->final_integrate();
|
||||
if (n_end_of_step) modify->end_of_step();
|
||||
timer->stamp(Timer::MODIFY);
|
||||
|
||||
if (ntimestep == output->next) {
|
||||
timer->stamp();
|
||||
output->write(ntimestep);
|
||||
timer->stamp(TIME_OUTPUT);
|
||||
timer->stamp(Timer::OUTPUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -498,7 +506,7 @@ void VerletSplitIntel::rk_setup()
|
||||
atom->map_clear();
|
||||
comm->borders();
|
||||
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -549,7 +557,7 @@ void VerletSplitIntel::r2k_comm()
|
||||
if (tip4p_flag && !master) {
|
||||
timer->stamp();
|
||||
comm->forward_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
timer->stamp(Timer::COMM);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1442,30 +1442,6 @@ void Input::package()
|
||||
delete [] fixarg;
|
||||
|
||||
} else if (strcmp(arg[0],"intel") == 0) {
|
||||
|
||||
// add omp package for non-pair routines
|
||||
|
||||
/*
|
||||
char **fixarg = new char*[2+narg];
|
||||
fixarg[0] = (char *) "package_omp";
|
||||
fixarg[1] = (char *) "all";
|
||||
fixarg[2] = (char *) "OMP";
|
||||
int omp_narg = 3;
|
||||
if (narg > 1) {
|
||||
fixarg[3] = arg[1];
|
||||
omp_narg++;
|
||||
if (narg > 2)
|
||||
for (int i = 2; i < narg; i++)
|
||||
if (strcmp(arg[i],"mixed") == 0) {
|
||||
fixarg[4] = arg[i];
|
||||
omp_narg++;
|
||||
}
|
||||
}
|
||||
modify->add_fix(omp_narg,fixarg);
|
||||
|
||||
// add intel package for neighbor and pair routines
|
||||
*/
|
||||
|
||||
if (!modify->check_package("Intel"))
|
||||
error->all(FLERR,
|
||||
"Package intel command without USER-INTEL package installed");
|
||||
@ -1478,17 +1454,12 @@ void Input::package()
|
||||
modify->add_fix(2+narg,fixarg);
|
||||
delete [] fixarg;
|
||||
|
||||
/*
|
||||
// if running with offload, set run_style to verlet/intel
|
||||
// set integrator = verlet/intel
|
||||
// -sf intel does same thing in Update constructor via suffix
|
||||
|
||||
#ifdef LMP_INTEL_OFFLOAD
|
||||
#ifdef __INTEL_OFFLOAD
|
||||
char *str;
|
||||
str = (char *) "verlet/intel";
|
||||
update->create_integrate(1,&str,0);
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
} else error->all(FLERR,"Illegal package command");
|
||||
}
|
||||
|
||||
@ -53,6 +53,15 @@ void Integrate::init()
|
||||
else pair_compute_flag = 0;
|
||||
if (force->kspace && force->kspace->compute_flag) kspace_compute_flag = 1;
|
||||
else kspace_compute_flag = 0;
|
||||
|
||||
// should add checks:
|
||||
// for any acceleration package that has its own integrate/minimize
|
||||
// in case input script has reset the run or minimize style explicitly
|
||||
// e.g. invalid to have intel pair style with non-intel verlet
|
||||
// but OK to have intel verlet with non intel pair style (just warn)
|
||||
// ditto for USER-CUDA and KOKKOS package verlet with their pair, fix, etc
|
||||
// making these checks would require all the pair, fix, etc styles have
|
||||
// cuda, kokkos, intel flags
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -667,6 +667,8 @@ void LAMMPS::post_create()
|
||||
{
|
||||
if (!suffix_enable) return;
|
||||
|
||||
// suffix will always be set if suffix_enable = 1
|
||||
|
||||
if (strcmp(suffix,"gpu") == 0 && !modify->check_package("GPU"))
|
||||
error->all(FLERR,"Using suffix gpu without GPU package installed");
|
||||
if (strcmp(suffix,"intel") == 0 && !modify->check_package("Intel"))
|
||||
@ -674,7 +676,10 @@ void LAMMPS::post_create()
|
||||
if (strcmp(suffix,"omp") == 0 && !modify->check_package("OMP"))
|
||||
error->all(FLERR,"Using suffix omp without USER-OMP package installed");
|
||||
|
||||
if (strcmp(suffix2,"omp") == 0 && !modify->check_package("OMP")) {
|
||||
// suffix2 only currently set by -sf intel
|
||||
// need to unset if LAMMPS was not built with USER-OMP package
|
||||
|
||||
if (suffix2 && strcmp(suffix2,"omp") == 0 && !modify->check_package("OMP")) {
|
||||
delete [] suffix2;
|
||||
suffix2 = NULL;
|
||||
}
|
||||
|
||||
@ -47,9 +47,6 @@ class LAMMPS {
|
||||
int cite_enable; // 1 if generating log.cite, 0 if disabled
|
||||
|
||||
class Cuda *cuda; // CUDA accelerator class
|
||||
//class GPU *gpu; // GPU accelerator class
|
||||
//class Intel *intel; // Intel accelerator class
|
||||
//class OMP *omp; // OMP accelerator class
|
||||
class KokkosLMP *kokkos; // KOKKOS accelerator class
|
||||
|
||||
class CiteMe *citeme; // citation info
|
||||
|
||||
@ -167,6 +167,33 @@ typedef int bigint;
|
||||
|
||||
}
|
||||
|
||||
// preprocessor macros for compiler specific settings
|
||||
// clear previous definitions to avoid redefinition warning
|
||||
#ifdef _alignvar
|
||||
#undef _alignvar
|
||||
#endif
|
||||
#ifdef _noalias
|
||||
#undef _noalias
|
||||
#endif
|
||||
|
||||
// define stack variable alignment
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define _alignvar(expr,val) __declspec(align(val)) expr
|
||||
#elif defined(__GNUC__)
|
||||
#define _alignvar(expr,val) expr __attribute((aligned(val)))
|
||||
#else
|
||||
#define _alignvar(expr,val) expr
|
||||
#endif
|
||||
|
||||
// declaration to lift aliasing restrictions
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define _noalias restrict
|
||||
#elif defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
// settings to enable LAMMPS to build under Windows
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
@ -1 +1 @@
|
||||
#define LAMMPS_VERSION "14 Aug 2014"
|
||||
#define LAMMPS_VERSION "15 Aug 2014"
|
||||
|
||||
Reference in New Issue
Block a user