Adding full hybrid support to USER-INTEL package and fixing bug with EAM parameter initialization.
This commit is contained in:
@ -499,7 +499,7 @@ MPI task.
|
||||
When offloading to a coprocessor, "hybrid"_pair_hybrid.html styles
|
||||
that require skip lists for neighbor builds cannot be offloaded.
|
||||
Using "hybrid/overlay"_pair_hybrid.html is allowed. Only one intel
|
||||
accelerated style may be used with hybrid styles.
|
||||
accelerated style may be used with hybrid styles when offloading.
|
||||
"Special_bonds"_special_bonds.html exclusion lists are not currently
|
||||
supported with offload, however, the same effect can often be
|
||||
accomplished by setting cutoffs for excluded atom types to 0. None of
|
||||
|
||||
@ -314,7 +314,7 @@ void AngleHarmonicIntel::init_style()
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
{
|
||||
const int bp1 = atom->nangletypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
@ -291,7 +291,7 @@ void BondFENEIntel::init_style()
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void BondFENEIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
{
|
||||
const int bp1 = atom->nbondtypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
@ -416,7 +416,7 @@ void DihedralOPLSIntel::init_style()
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void DihedralOPLSIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
{
|
||||
const int bp1 = atom->ndihedraltypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
@ -65,6 +65,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
|
||||
_nbor_pack_width = 1;
|
||||
_three_body_neighbor = 0;
|
||||
_hybrid_nonpair = 0;
|
||||
|
||||
_precision_mode = PREC_MODE_MIXED;
|
||||
_offload_balance = -1.0;
|
||||
@ -266,8 +267,7 @@ FixIntel::~FixIntel()
|
||||
double *time1 = off_watch_pair();
|
||||
double *time2 = off_watch_neighbor();
|
||||
int *overflow = get_off_overflow_flag();
|
||||
if (_offload_balance != 0.0 && time1 != NULL && time2 != NULL &&
|
||||
overflow != NULL) {
|
||||
if (_offload_balance != 0.0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(time1,time2,overflow:alloc_if(0) free_if(1))
|
||||
}
|
||||
@ -314,34 +314,63 @@ void FixIntel::init()
|
||||
|
||||
int nstyles = 0;
|
||||
if (force->pair_match("hybrid", 1) != NULL) {
|
||||
_pair_hybrid_flag = 1;
|
||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (strstr(hybrid->keywords[i], "/intel") != NULL)
|
||||
nstyles++;
|
||||
if (force->newton_pair != 0 && force->pair->no_virial_fdotr_compute)
|
||||
error->all(FLERR,
|
||||
"Intel package requires fdotr virial with newton on.");
|
||||
} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
|
||||
_pair_hybrid_flag = 1;
|
||||
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (strstr(hybrid->keywords[i], "/intel") != NULL)
|
||||
nstyles++;
|
||||
else
|
||||
force->pair->no_virial_fdotr_compute = 1;
|
||||
if (force->newton_pair != 0 && force->pair->no_virial_fdotr_compute)
|
||||
error->all(FLERR,
|
||||
"Intel package requires fdotr virial with newton on.");
|
||||
} else
|
||||
_pair_hybrid_flag = 0;
|
||||
|
||||
if (nstyles > 1 && _pair_hybrid_flag) _pair_hybrid_flag = 2;
|
||||
else if (force->newton_pair == 0) _pair_hybrid_flag = 0;
|
||||
|
||||
_pair_hybrid_zero = 0;
|
||||
_zero_master = 0;
|
||||
|
||||
if (_pair_hybrid_flag && _hybrid_nonpair)
|
||||
if (_pair_hybrid_flag > 1 || force->newton_pair == 0)
|
||||
_pair_hybrid_zero = 1;
|
||||
_hybrid_nonpair = 0;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_balance() != 0.0) {
|
||||
_pair_hybrid_zero = 0;
|
||||
if (force->newton_pair == 0) _pair_hybrid_flag = 0;
|
||||
if (nstyles > 1)
|
||||
error->all(FLERR,
|
||||
"Currently, cannot offload more than one intel style with hybrid.");
|
||||
}
|
||||
if (nstyles > 1)
|
||||
error->all(FLERR,
|
||||
"Currently, cannot use more than one intel style with hybrid.");
|
||||
#endif
|
||||
|
||||
check_neighbor_intel();
|
||||
|
||||
int off_mode = 0;
|
||||
if (_offload_balance != 0.0) off_mode = 1;
|
||||
if (_precision_mode == PREC_MODE_SINGLE) {
|
||||
_single_buffers->zero_ev();
|
||||
_single_buffers->grow_ncache(off_mode,_nthreads);
|
||||
_single_buffers->free_list_ptrs();
|
||||
} else if (_precision_mode == PREC_MODE_MIXED) {
|
||||
_mixed_buffers->zero_ev();
|
||||
_mixed_buffers->grow_ncache(off_mode,_nthreads);
|
||||
_mixed_buffers->free_list_ptrs();
|
||||
} else {
|
||||
_double_buffers->zero_ev();
|
||||
_double_buffers->grow_ncache(off_mode,_nthreads);
|
||||
_double_buffers->free_list_ptrs();
|
||||
}
|
||||
|
||||
_need_reduce = 0;
|
||||
@ -349,7 +378,7 @@ void FixIntel::init()
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::setup(int /*vflag*/)
|
||||
void FixIntel::setup(int vflag)
|
||||
{
|
||||
if (neighbor->style != Neighbor::BIN)
|
||||
error->all(FLERR,
|
||||
@ -395,8 +424,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
|
||||
double *time1 = off_watch_pair();
|
||||
double *time2 = off_watch_neighbor();
|
||||
int *overflow = get_off_overflow_flag();
|
||||
if (_offload_balance !=0.0 && time1 != NULL && time2 != NULL &&
|
||||
overflow != NULL) {
|
||||
if (_offload_balance !=0.0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \
|
||||
in(overflow:length(5) alloc_if(1) free_if(0))
|
||||
@ -419,6 +447,21 @@ void FixIntel::pair_init_check(const bool cdmessage)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef LMP_INTEL_NBOR_COMPAT
|
||||
if (force->pair->manybody_flag && atom->molecular) {
|
||||
int flag = 0;
|
||||
if (atom->nbonds > 0 && force->special_lj[1] == 0.0 &&
|
||||
force->special_coul[1] == 0.0) flag = 1;
|
||||
if (atom->nangles > 0 && force->special_lj[2] == 0.0 &&
|
||||
force->special_coul[2] == 0.0) flag = 1;
|
||||
if (atom->ndihedrals > 0 && force->special_lj[3] == 0.0 &&
|
||||
force->special_coul[3] == 0.0) flag = 1;
|
||||
if (flag)
|
||||
error->all(FLERR,"Add -DLMP_INTEL_NBOR_COMPAT to build for special_bond"
|
||||
"exclusions with Intel");
|
||||
}
|
||||
#endif
|
||||
|
||||
int need_tag = 0;
|
||||
if (atom->molecular) need_tag = 1;
|
||||
|
||||
@ -477,11 +520,13 @@ void FixIntel::bond_init_check()
|
||||
if (force->pair_match("/intel", 0) != NULL)
|
||||
intel_pair = 1;
|
||||
else if (force->pair_match("hybrid", 1) != NULL) {
|
||||
_hybrid_nonpair = 1;
|
||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (strstr(hybrid->keywords[i], "/intel") != NULL)
|
||||
intel_pair = 1;
|
||||
} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
|
||||
_hybrid_nonpair = 1;
|
||||
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (strstr(hybrid->keywords[i], "/intel") != NULL)
|
||||
@ -501,11 +546,13 @@ void FixIntel::kspace_init_check()
|
||||
if (force->pair_match("/intel", 0) != NULL)
|
||||
intel_pair = 1;
|
||||
else if (force->pair_match("hybrid", 1) != NULL) {
|
||||
_hybrid_nonpair = 1;
|
||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (strstr(hybrid->keywords[i], "/intel") != NULL)
|
||||
intel_pair = 1;
|
||||
} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
|
||||
_hybrid_nonpair = 1;
|
||||
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (strstr(hybrid->keywords[i], "/intel") != NULL)
|
||||
@ -522,51 +569,60 @@ void FixIntel::check_neighbor_intel()
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_full_host_list = 0;
|
||||
#endif
|
||||
const int nrequest = neighbor->nrequest;
|
||||
|
||||
const int nrequest = neighbor->nrequest;
|
||||
for (int i = 0; i < nrequest; ++i) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_offload_balance != 0.0 && neighbor->requests[i]->intel == 0) {
|
||||
_full_host_list = 1;
|
||||
_offload_noghost = 0;
|
||||
}
|
||||
#endif
|
||||
if (neighbor->requests[i]->skip && _offload_balance != 0.0)
|
||||
error->all(FLERR, "Cannot yet use hybrid styles with Intel offload.");
|
||||
|
||||
// avoid flagging a neighbor list as both USER-INTEL and USER-OMP
|
||||
if (neighbor->requests[i]->intel)
|
||||
neighbor->requests[i]->omp = 0;
|
||||
|
||||
if (neighbor->requests[i]->skip)
|
||||
error->all(FLERR, "Hybrid styles with Intel package are unsupported.");
|
||||
}
|
||||
#else
|
||||
// avoid flagging a neighbor list as both USER-INTEL and USER-OMP
|
||||
const int nrequest = neighbor->nrequest;
|
||||
for (int i = 0; i < nrequest; ++i)
|
||||
if (neighbor->requests[i]->intel)
|
||||
neighbor->requests[i]->omp = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::pre_reverse(int /*eflag*/, int /*vflag*/)
|
||||
void FixIntel::_sync_main_arrays(const int prereverse)
|
||||
{
|
||||
if (!prereverse) _zero_master = 1;
|
||||
int done_this_step = prereverse;
|
||||
if (_pair_hybrid_zero == 0) done_this_step = 1;
|
||||
if (_force_array_m != 0) {
|
||||
if (_need_reduce) {
|
||||
reduce_results(&_force_array_m[0].x);
|
||||
_need_reduce = 0;
|
||||
}
|
||||
add_results(_force_array_m, _ev_array_d, _results_eatom, _results_vatom,0);
|
||||
_force_array_m = 0;
|
||||
if (done_this_step) _force_array_m = 0;
|
||||
else _ev_array_d = 0;
|
||||
} else if (_force_array_d != 0) {
|
||||
if (_need_reduce) {
|
||||
reduce_results(&_force_array_d[0].x);
|
||||
_need_reduce = 0;
|
||||
}
|
||||
add_results(_force_array_d, _ev_array_d, _results_eatom, _results_vatom,0);
|
||||
_force_array_d = 0;
|
||||
if (done_this_step) _force_array_d = 0;
|
||||
else _ev_array_d = 0;
|
||||
} else if (_force_array_s != 0) {
|
||||
if (_need_reduce) {
|
||||
reduce_results(&_force_array_s[0].x);
|
||||
_need_reduce = 0;
|
||||
}
|
||||
add_results(_force_array_s, _ev_array_s, _results_eatom, _results_vatom,0);
|
||||
_force_array_s = 0;
|
||||
if (done_this_step) _force_array_s = 0;
|
||||
else _ev_array_s = 0;
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
@ -576,6 +632,13 @@ void FixIntel::pre_reverse(int /*eflag*/, int /*vflag*/)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::pre_reverse(int /*eflag*/, int /*vflag*/)
|
||||
{
|
||||
_sync_main_arrays(1);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class acc_t>
|
||||
void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
|
||||
{
|
||||
@ -657,7 +720,7 @@ template <class ft, class acc_t>
|
||||
void FixIntel::add_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int /*offload*/) {
|
||||
const int offload) {
|
||||
start_watch(TIME_PACK);
|
||||
int f_length;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
|
||||
@ -74,11 +74,12 @@ class FixIntel : public Fix {
|
||||
inline int nbor_pack_width() const { return _nbor_pack_width; }
|
||||
inline void nbor_pack_width(const int w) { _nbor_pack_width = w; }
|
||||
inline int three_body_neighbor() { return _three_body_neighbor; }
|
||||
inline void three_body_neighbor(const int /*i*/) { _three_body_neighbor = 1; }
|
||||
inline void three_body_neighbor(const int i) { _three_body_neighbor = i; }
|
||||
|
||||
inline int need_zero(const int tid) {
|
||||
if (_need_reduce == 0 && tid > 0) return 1;
|
||||
return 0;
|
||||
else if (_zero_master && tid == 0) { _zero_master = 0; return 1; }
|
||||
else return 0;
|
||||
}
|
||||
inline void set_reduce_flag() { if (_nthreads > 1) _need_reduce = 1; }
|
||||
inline int lrt() {
|
||||
@ -100,7 +101,10 @@ class FixIntel : public Fix {
|
||||
IntelBuffers<double,double> *_double_buffers;
|
||||
|
||||
int _precision_mode, _nthreads, _nbor_pack_width, _three_body_neighbor;
|
||||
|
||||
int _pair_hybrid_flag;
|
||||
// These should be removed in subsequent update w/ simpler hybrid arch
|
||||
int _pair_hybrid_zero, _hybrid_nonpair, _zero_master;
|
||||
|
||||
public:
|
||||
inline int* get_overflow_flag() { return _overflow_flag; }
|
||||
inline int* get_off_overflow_flag() { return _off_overflow_flag; }
|
||||
@ -210,6 +214,8 @@ class FixIntel : public Fix {
|
||||
_alignvar(double _stopwatch_offload_neighbor[1],64);
|
||||
_alignvar(double _stopwatch_offload_pair[1],64);
|
||||
|
||||
void _sync_main_arrays(const int prereverse);
|
||||
|
||||
template <class ft>
|
||||
void reduce_results(ft * _noalias const f_in);
|
||||
|
||||
@ -238,7 +244,7 @@ class FixIntel : public Fix {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::get_buffern(const int /*offload*/, int &nlocal, int &nall,
|
||||
void FixIntel::get_buffern(const int offload, int &nlocal, int &nall,
|
||||
int &minlocal) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_separate_buffers) {
|
||||
@ -273,7 +279,7 @@ void FixIntel::get_buffern(const int /*offload*/, int &nlocal, int &nall,
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
|
||||
double *ev_in, const int /*offload*/,
|
||||
double *ev_in, const int offload,
|
||||
const int eatom, const int vatom,
|
||||
const int rflag) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
@ -282,6 +288,8 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
|
||||
_off_results_vatom = vatom;
|
||||
_off_force_array_d = f_in;
|
||||
_off_ev_array_d = ev_in;
|
||||
if (_pair_hybrid_flag && force->pair->fdotr_is_set())
|
||||
_sync_main_arrays(1);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -296,12 +304,15 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
|
||||
|
||||
if (_overflow_flag[LMP_OVERFLOW])
|
||||
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (_pair_hybrid_flag > 1 ||
|
||||
(_pair_hybrid_flag && force->pair->fdotr_is_set())) _sync_main_arrays(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
|
||||
double *ev_in, const int /*offload*/,
|
||||
double *ev_in, const int offload,
|
||||
const int eatom, const int vatom,
|
||||
const int rflag) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
@ -310,6 +321,8 @@ void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
|
||||
_off_results_vatom = vatom;
|
||||
_off_force_array_m = f_in;
|
||||
_off_ev_array_d = ev_in;
|
||||
if (_pair_hybrid_flag && force->pair->fdotr_is_set())
|
||||
_sync_main_arrays(1);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -324,12 +337,16 @@ void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
|
||||
|
||||
if (_overflow_flag[LMP_OVERFLOW])
|
||||
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (_pair_hybrid_flag > 1 ||
|
||||
(_pair_hybrid_flag && force->pair->fdotr_is_set()))
|
||||
_sync_main_arrays(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
|
||||
float *ev_in, const int /*offload*/,
|
||||
float *ev_in, const int offload,
|
||||
const int eatom, const int vatom,
|
||||
const int rflag) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
@ -338,6 +355,8 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
|
||||
_off_results_vatom = vatom;
|
||||
_off_force_array_s = f_in;
|
||||
_off_ev_array_s = ev_in;
|
||||
if (_pair_hybrid_flag && force->pair->fdotr_is_set())
|
||||
_sync_main_arrays(1);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -352,6 +371,10 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
|
||||
|
||||
if (_overflow_flag[LMP_OVERFLOW])
|
||||
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (_pair_hybrid_flag > 1 ||
|
||||
(_pair_hybrid_flag && force->pair->fdotr_is_set()))
|
||||
_sync_main_arrays(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -487,16 +510,16 @@ The compiler version used to build LAMMPS is not supported when using
|
||||
offload to a coprocessor. There could be performance or correctness
|
||||
issues. Please use 14.0.1.106 or 15.1.133 or later.
|
||||
|
||||
E: Currently, cannot use more than one intel style with hybrid.
|
||||
E: Currently, cannot offload more than one intel style with hybrid.
|
||||
|
||||
Currently, hybrid pair styles can only use the intel suffix for one of the
|
||||
pair styles.
|
||||
Currently, when using offload, hybrid pair styles can only use the intel
|
||||
suffix for one of the pair styles.
|
||||
|
||||
E: Cannot yet use hybrid styles with Intel package.
|
||||
E: Cannot yet use hybrid styles with Intel offload.
|
||||
|
||||
The hybrid pair style configuration is not yet supported by the Intel
|
||||
package. Support is limited to hybrid/overlay or a hybrid style that does
|
||||
not require a skip list.
|
||||
The hybrid pair style configuration is not yet supported when using offload
|
||||
within the Intel package. Support is limited to hybrid/overlay or a hybrid
|
||||
style that does not require a skip list.
|
||||
|
||||
W: Leaving a core/node free can improve performance for offload
|
||||
|
||||
@ -538,4 +561,16 @@ E: Too few atoms for load balancing offload.
|
||||
When using offload to a coprocessor, each MPI task must have at least 2
|
||||
atoms throughout the simulation.
|
||||
|
||||
E: Intel package requires fdotr virial with newton on.
|
||||
|
||||
This error can occur with a hybrid pair style that mixes styles that are
|
||||
incompatible with the newton pair setting turned on. Try turning the
|
||||
newton pair setting off.
|
||||
|
||||
E: Add -DLMP_INTEL_NBOR_COMPAT to build for special_bond exclusions with Intel
|
||||
|
||||
When using a manybody pair style, bonds/angles/dihedrals, and special_bond
|
||||
exclusions, LAMMPS should be built with the above compile flag for compatible
|
||||
results.
|
||||
|
||||
*/
|
||||
|
||||
@ -428,7 +428,7 @@ void ImproperCvffIntel::init_style()
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void ImproperCvffIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
{
|
||||
const int bp1 = atom->nimpropertypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
@ -24,7 +24,9 @@ using namespace LAMMPS_NS;
|
||||
template <class flt_t, class acc_t>
|
||||
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
|
||||
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _off_threads(0),
|
||||
_buf_size(0), _buf_local_size(0) {
|
||||
_buf_size(0), _buf_local_size(0), _n_list_ptrs(1), _max_list_ptrs(4) {
|
||||
_neigh_list_ptrs = new IntelNeighListPtrs[_max_list_ptrs];
|
||||
_neigh_list_ptrs[0].cnumneigh = 0;
|
||||
_list_alloc_atoms = 0;
|
||||
_ntypes = 0;
|
||||
_off_map_listlocal = 0;
|
||||
@ -55,6 +57,7 @@ IntelBuffers<flt_t, acc_t>::~IntelBuffers()
|
||||
free_all_nbor_buffers();
|
||||
free_ccache();
|
||||
set_ntypes(0);
|
||||
delete []_neigh_list_ptrs;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -109,7 +112,7 @@ void IntelBuffers<flt_t, acc_t>::free_buffers()
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal,
|
||||
const int nthreads,
|
||||
const int /*offload_end*/)
|
||||
const int offload_end)
|
||||
{
|
||||
free_buffers();
|
||||
_buf_size = static_cast<double>(nall) * 1.1 + 1;
|
||||
@ -186,11 +189,9 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
|
||||
const int * tag = _off_map_tag;
|
||||
const int * special = _off_map_special;
|
||||
const int * nspecial = _off_map_nspecial;
|
||||
if (tag != 0 && special != 0 && nspecial !=0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(tag:alloc_if(0) free_if(1)) \
|
||||
nocopy(special,nspecial:alloc_if(0) free_if(1))
|
||||
}
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(tag:alloc_if(0) free_if(1)) \
|
||||
nocopy(special,nspecial:alloc_if(0) free_if(1))
|
||||
_off_map_nmax = 0;
|
||||
_host_nmax = 0;
|
||||
}
|
||||
@ -200,7 +201,7 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int /*offload_end*/)
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
free_nmax();
|
||||
@ -243,46 +244,117 @@ template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::free_list_local()
|
||||
{
|
||||
if (_off_map_listlocal > 0) {
|
||||
int * cnumneigh = _cnumneigh;
|
||||
if (_neigh_list_ptrs[0].cnumneigh) {
|
||||
int * cnumneigh = _neigh_list_ptrs[0].cnumneigh;
|
||||
_neigh_list_ptrs[0].cnumneigh = 0;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_off_map_ilist != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cnumneigh:alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
lmp->memory->destroy(cnumneigh);
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_off_map_ilist != NULL) {
|
||||
const int * ilist = _off_map_ilist;
|
||||
const int * numneigh = _off_map_numneigh;
|
||||
const int ** firstneigh = (const int **)_off_map_firstneigh;
|
||||
_off_map_ilist = NULL;
|
||||
if (numneigh != 0 && ilist != 0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
|
||||
}
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ilist,firstneigh,numneigh:alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
lmp->memory->destroy(cnumneigh);
|
||||
_off_map_listlocal = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::free_list_ptrs()
|
||||
{
|
||||
for (int list_num = 1; list_num < _n_list_ptrs; list_num++) {
|
||||
if (_neigh_list_ptrs[list_num].size) {
|
||||
lmp->memory->destroy(_neigh_list_ptrs[list_num].cnumneigh);
|
||||
lmp->memory->destroy(_neigh_list_ptrs[list_num].numneighhalf);
|
||||
}
|
||||
_neigh_list_ptrs[list_num].size = 0;
|
||||
_neigh_list_ptrs[list_num].list_ptr = 0;
|
||||
}
|
||||
_n_list_ptrs = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::grow_data3(NeighList *list,
|
||||
int *&numneighhalf,
|
||||
int *&cnumneigh)
|
||||
{
|
||||
const int size = list->get_maxlocal();
|
||||
int list_num;
|
||||
for (list_num = 0; list_num < _n_list_ptrs; list_num++)
|
||||
if (_neigh_list_ptrs[list_num].list_ptr == (void*)list) break;
|
||||
if (list_num == _n_list_ptrs) {
|
||||
if (_n_list_ptrs == _max_list_ptrs) {
|
||||
_max_list_ptrs *= 2;
|
||||
IntelNeighListPtrs *new_list = new IntelNeighListPtrs[_max_list_ptrs];
|
||||
for (int i = 0; i < _n_list_ptrs; i++) new_list[i] = _neigh_list_ptrs[i];
|
||||
delete []_neigh_list_ptrs;
|
||||
_neigh_list_ptrs = new_list;
|
||||
}
|
||||
_neigh_list_ptrs[list_num].list_ptr = (void *)list;
|
||||
_neigh_list_ptrs[list_num].size = 0;
|
||||
_n_list_ptrs++;
|
||||
}
|
||||
if (size > _neigh_list_ptrs[list_num].size) {
|
||||
if (_neigh_list_ptrs[list_num].size) {
|
||||
lmp->memory->destroy(_neigh_list_ptrs[list_num].cnumneigh);
|
||||
lmp->memory->destroy(_neigh_list_ptrs[list_num].numneighhalf);
|
||||
}
|
||||
lmp->memory->create(_neigh_list_ptrs[list_num].cnumneigh, size,
|
||||
"_cnumneigh");
|
||||
lmp->memory->create(_neigh_list_ptrs[list_num].numneighhalf, size,
|
||||
"_cnumneigh");
|
||||
_neigh_list_ptrs[list_num].size = size;
|
||||
}
|
||||
numneighhalf = _neigh_list_ptrs[list_num].numneighhalf;
|
||||
cnumneigh = _neigh_list_ptrs[list_num].cnumneigh;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_list_local(NeighList *list,
|
||||
const int /*offload_end*/)
|
||||
const int three_body,
|
||||
const int offload_end)
|
||||
{
|
||||
free_list_local();
|
||||
int size = list->get_maxlocal();
|
||||
lmp->memory->create(_cnumneigh, size, "_cnumneigh");
|
||||
_off_map_listlocal = size;
|
||||
if (three_body)
|
||||
lmp->memory->create(_neigh_list_ptrs[0].cnumneigh, size, "_cnumneigh");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_end > 0) {
|
||||
int tb_size = size;
|
||||
if (three_body == 0) {
|
||||
lmp->memory->create(_neigh_list_ptrs[0].cnumneigh, 16, "_cnumneigh");
|
||||
tb_size = 16;
|
||||
}
|
||||
int ** firstneigh = list->firstneigh;
|
||||
int * numneigh = list->numneigh;
|
||||
int * ilist = list->ilist;
|
||||
int * cnumneigh = _cnumneigh;
|
||||
if (cnumneigh != 0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
|
||||
}
|
||||
int * cnumneigh = _neigh_list_ptrs[0].cnumneigh;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(firstneigh:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(cnumneigh:length(tb_size) alloc_if(1) free_if(0))
|
||||
_off_map_ilist = ilist;
|
||||
_off_map_firstneigh = firstneigh;
|
||||
_off_map_numneigh = numneigh;
|
||||
}
|
||||
#endif
|
||||
@ -313,7 +385,7 @@ template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList * /*list*/,
|
||||
const int nlocal,
|
||||
const int nthreads,
|
||||
const int /*offload_end*/,
|
||||
const int offload_end,
|
||||
const int pack_width)
|
||||
{
|
||||
free_nbor_list();
|
||||
@ -382,7 +454,7 @@ void IntelBuffers<flt_t, acc_t>::free_ccache()
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::grow_ccache(const int /*off_flag*/,
|
||||
void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag,
|
||||
const int nthreads,
|
||||
const int width)
|
||||
{
|
||||
@ -481,7 +553,7 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::grow_ncache(const int /*off_flag*/,
|
||||
void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
|
||||
const int nthreads)
|
||||
{
|
||||
const int nsize = get_max_nbors() * 3;
|
||||
@ -576,12 +648,12 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes,
|
||||
if (_ntypes > 0) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
flt_t * cutneighsqo = _cutneighsq[0];
|
||||
if (_off_threads > 0 && cutneighsqo != 0) {
|
||||
if (_off_threads > 0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighsqo:alloc_if(0) free_if(1))
|
||||
}
|
||||
flt_t * cutneighghostsqo;
|
||||
if (_cutneighghostsq && _off_threads > 0 && cutneighghostsqo != 0) {
|
||||
if (_cutneighghostsq && _off_threads > 0) {
|
||||
cutneighghostsqo = _cutneighghostsq[0];
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighghostsqo:alloc_if(0) free_if(1))
|
||||
@ -637,6 +709,8 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
|
||||
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
|
||||
tmem += _ntypes * _ntypes * sizeof(int);
|
||||
|
||||
tmem += _buf_local_size + (_n_list_ptrs - 1) * _buf_local_size * 2;
|
||||
|
||||
return tmem;
|
||||
}
|
||||
|
||||
|
||||
@ -33,12 +33,20 @@ namespace LAMMPS_NS {
|
||||
#define QUAT_T typename IntelBuffers<flt_t,acc_t>::quat_t
|
||||
#define FORCE_T typename IntelBuffers<flt_t,acc_t>::vec3_acc_t
|
||||
|
||||
struct IntelNeighListPtrs {
|
||||
void * list_ptr;
|
||||
int *cnumneigh;
|
||||
int *numneighhalf;
|
||||
int size;
|
||||
};
|
||||
|
||||
// May not need a separate force array for mixed/double
|
||||
template <class flt_t, class acc_t>
|
||||
class IntelBuffers {
|
||||
public:
|
||||
typedef struct { flt_t x,y,z; int w; } atom_t;
|
||||
typedef struct { flt_t w,i,j,k; } quat_t;
|
||||
typedef struct { flt_t x,y; } vec2_t;
|
||||
typedef struct { flt_t x,y,z,w; } vec3_t;
|
||||
typedef struct { flt_t x,y,z,w; } vec4_t;
|
||||
typedef struct { acc_t x,y,z,w; } vec3_acc_t;
|
||||
@ -62,8 +70,12 @@ class IntelBuffers {
|
||||
|
||||
void free_buffers();
|
||||
void free_nmax();
|
||||
inline void set_bininfo(int *atombin, int *binpacked)
|
||||
{ _atombin = atombin; _binpacked = binpacked; }
|
||||
inline void set_bininfo(int *atombin, int *binpacked) {
|
||||
_atombin = atombin;
|
||||
_binpacked = binpacked;
|
||||
_neigh_list_ptrs[0].numneighhalf = atombin;
|
||||
}
|
||||
|
||||
inline void grow(const int nall, const int nlocal, const int nthreads,
|
||||
const int offload_end) {
|
||||
if (nall >= _buf_size || nlocal >= _buf_local_size)
|
||||
@ -79,18 +91,22 @@ class IntelBuffers {
|
||||
free_nmax();
|
||||
free_list_local();
|
||||
free_ncache();
|
||||
free_list_ptrs();
|
||||
}
|
||||
|
||||
inline void grow_list(NeighList *list, const int nlocal, const int nthreads,
|
||||
const int offload_end, const int pack_width=1) {
|
||||
grow_list_local(list, offload_end);
|
||||
const int three_body, const int offload_end,
|
||||
const int pack_width=1) {
|
||||
grow_list_local(list, three_body, offload_end);
|
||||
grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
|
||||
}
|
||||
|
||||
void free_list_local();
|
||||
inline void grow_list_local(NeighList *list, const int offload_end) {
|
||||
inline void grow_list_local(NeighList *list, const int three_body,
|
||||
const int offload_end) {
|
||||
_neigh_list_ptrs[0].list_ptr = (void *)list;
|
||||
if (list->get_maxlocal() > _off_map_listlocal)
|
||||
_grow_list_local(list, offload_end);
|
||||
_grow_list_local(list, three_body, offload_end);
|
||||
}
|
||||
|
||||
void free_ccache();
|
||||
@ -133,26 +149,36 @@ class IntelBuffers {
|
||||
_grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
|
||||
}
|
||||
|
||||
void set_ntypes(const int ntypes, const int use_ghost_cut = 0);
|
||||
void set_ntypes(const int ntypes, const int use_ghost_cut = 1);
|
||||
|
||||
inline int * firstneigh(const NeighList * /*list*/) { return _list_alloc; }
|
||||
inline int * cnumneigh(const NeighList * /*list*/) { return _cnumneigh; }
|
||||
inline int * intel_list(const NeighList * /*list*/) { return _list_alloc; }
|
||||
inline int * get_atombin() { return _atombin; }
|
||||
inline int * get_binpacked() { return _binpacked; }
|
||||
inline int * cnumneigh() { return _neigh_list_ptrs[0].cnumneigh; }
|
||||
inline void get_list_data3(const NeighList *list, int *&numneighhalf,
|
||||
int *&cnumneigh) {
|
||||
for (int i = 0; i < _n_list_ptrs; i++)
|
||||
if ((void *)list == _neigh_list_ptrs[i].list_ptr) {
|
||||
numneighhalf = _neigh_list_ptrs[i].numneighhalf;
|
||||
cnumneigh = _neigh_list_ptrs[i].cnumneigh;
|
||||
}
|
||||
}
|
||||
void grow_data3(NeighList *list, int *&numneighhalf, int *&cnumneigh);
|
||||
void free_list_ptrs();
|
||||
|
||||
inline atom_t * get_x(const int /*offload*/ = 1) {
|
||||
inline atom_t * get_x(const int offload = 1) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_separate_buffers && offload == 0) return _host_x;
|
||||
#endif
|
||||
return _x;
|
||||
}
|
||||
inline flt_t * get_q(const int /*offload*/ = 1) {
|
||||
inline flt_t * get_q(const int offload = 1) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_separate_buffers && offload == 0) return _host_q;
|
||||
#endif
|
||||
return _q;
|
||||
}
|
||||
inline quat_t * get_quat(const int /*offload*/ = 1) {
|
||||
inline quat_t * get_quat(const int offload = 1) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_separate_buffers && offload == 0) return _host_quat;
|
||||
#endif
|
||||
@ -298,6 +324,9 @@ class IntelBuffers {
|
||||
int _list_alloc_atoms;
|
||||
int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked;
|
||||
|
||||
IntelNeighListPtrs *_neigh_list_ptrs;
|
||||
int _n_list_ptrs, _max_list_ptrs;
|
||||
|
||||
flt_t **_cutneighsq, **_cutneighghostsq;
|
||||
int _ntypes;
|
||||
|
||||
@ -325,7 +354,7 @@ class IntelBuffers {
|
||||
int _off_map_nmax, _cop, _off_ccache, _off_ncache;
|
||||
int *_off_map_ilist;
|
||||
int *_off_map_special, *_off_map_nspecial, *_off_map_tag;
|
||||
int *_off_map_numneigh;
|
||||
int **_off_map_firstneigh, *_off_map_numneigh;
|
||||
bool _off_list_alloc;
|
||||
#endif
|
||||
|
||||
@ -336,7 +365,8 @@ class IntelBuffers {
|
||||
void _grow(const int nall, const int nlocal, const int nthreads,
|
||||
const int offload_end);
|
||||
void _grow_nmax(const int offload_end);
|
||||
void _grow_list_local(NeighList *list, const int offload_end);
|
||||
void _grow_list_local(NeighList *list, const int three_body,
|
||||
const int offload_end);
|
||||
void _grow_nbor_list(NeighList *list, const int nlocal, const int nthreads,
|
||||
const int offload_end, const int pack_width);
|
||||
};
|
||||
|
||||
@ -20,6 +20,9 @@
|
||||
#if (__INTEL_COMPILER_BUILD_DATE > 20160720)
|
||||
#define LMP_INTEL_USE_SIMDOFF
|
||||
#endif
|
||||
#pragma warning (disable:3948)
|
||||
#pragma warning (disable:3949)
|
||||
#pragma warning (disable:13200)
|
||||
#endif
|
||||
|
||||
#ifdef __INTEL_OFFLOAD
|
||||
@ -606,7 +609,7 @@ inline double MIC_Wtime() {
|
||||
if (newton) \
|
||||
f_stride = buffers->get_stride(nall); \
|
||||
else \
|
||||
f_stride = buffers->get_stride(inum); \
|
||||
f_stride = buffers->get_stride(nlocal); \
|
||||
}
|
||||
|
||||
#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \
|
||||
|
||||
@ -213,6 +213,12 @@ namespace ip_simd {
|
||||
_MM_SCALE_8);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_gatherz(const SIMD_mask &m, const int *p,
|
||||
const SIMD_int &i) {
|
||||
return _mm512_mask_i32gather_epi32( _mm512_set1_epi32(0), m, i, p,
|
||||
_MM_SCALE_4);
|
||||
}
|
||||
|
||||
inline SIMD_float SIMD_gatherz(const SIMD_mask &m, const float *p,
|
||||
const SIMD_int &i) {
|
||||
return _mm512_mask_i32gather_ps( _mm512_set1_ps((float)0), m, i, p,
|
||||
|
||||
@ -192,7 +192,6 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
|
||||
|
||||
// ---------- Bin Atoms -------------
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
//const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const atombin = this->_atombin;
|
||||
int * _noalias const binpacked = this->_binpacked;
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ void NPairFullBinGhostIntel::fbi(NeighList * list,
|
||||
|
||||
// only uses offload_end_neighbor to check whether we are doing offloading
|
||||
// at all, no need to correct this later
|
||||
buffers->grow_list(list, nall, comm->nthreads, off_end,
|
||||
buffers->grow_list(list, nall, comm->nthreads, 0, off_end,
|
||||
_fix->nbor_pack_width());
|
||||
|
||||
int need_ic = 0;
|
||||
@ -106,7 +106,7 @@ void NPairFullBinGhostIntel::fbi(NeighList * list,
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class flt_t, class acc_t, int need_ic>
|
||||
void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const int pstart, const int pend) {
|
||||
if (pend-pstart == 0) return;
|
||||
@ -116,7 +116,8 @@ void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
const int aend = nall;
|
||||
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
int * _noalias const intel_list = buffers->intel_list(list);
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
@ -140,7 +141,6 @@ void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = this->nstencil;
|
||||
const int * _noalias const stencil = this->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
@ -228,7 +228,7 @@ void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cutneighghostsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(intel_list:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
in(atombin:length(aend) alloc_if(0) free_if(0)) \
|
||||
@ -295,7 +295,7 @@ void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
|
||||
int pack_offset = maxnbors;
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
int *neighptr = intel_list + ct;
|
||||
const int obound = pack_offset + maxnbors * 2;
|
||||
|
||||
const int toffs = tid * ncache_stride;
|
||||
@ -528,12 +528,12 @@ void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
if (ns > maxnbors) *overflow = 1;
|
||||
|
||||
ilist[i] = i;
|
||||
cnumneigh[i] = ct;
|
||||
firstneigh[i] = intel_list + ct;
|
||||
numneigh[i] = ns;
|
||||
|
||||
ct += ns;
|
||||
IP_PRE_edge_align(ct, sizeof(int));
|
||||
neighptr = firstneigh + ct;
|
||||
neighptr = intel_list + ct;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
*overflow = 1;
|
||||
@ -564,13 +564,12 @@ void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
if (offload) {
|
||||
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
firstneigh[0] = intel_list;
|
||||
for (int n = 0; n < aend; n++) {
|
||||
ilist[n] = n;
|
||||
numneigh[n] = 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
if (separate_buffers) {
|
||||
_fix->start_watch(TIME_PACK);
|
||||
_fix->set_neighbor_host_sizes();
|
||||
@ -581,10 +580,5 @@ void NPairFullBinGhostIntel::fbi(const int /*offload*/, NeighList * list,
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
}
|
||||
#else
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
for (int i = 0; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -70,7 +70,8 @@ fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end,
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads,
|
||||
_fix->three_body_neighbor(), off_end,
|
||||
_fix->nbor_pack_width());
|
||||
|
||||
int need_ic = 0;
|
||||
|
||||
@ -71,7 +71,7 @@ hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular)
|
||||
|
||||
@ -71,7 +71,7 @@ hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular)
|
||||
|
||||
47
src/USER-INTEL/npair_halffull_newtoff_intel.h
Normal file
47
src/USER-INTEL/npair_halffull_newtoff_intel.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// Only used for hybrid to generate list for non-intel style. Use
|
||||
// standard routines.
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(halffull/newtoff/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_INTEL)
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL)
|
||||
|
||||
NPairStyle(halffull/newtoff/ghost/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL)
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/ghost/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_INTEL)
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
233
src/USER-INTEL/npair_halffull_newton_intel.cpp
Normal file
233
src/USER-INTEL/npair_halffull_newton_intel.cpp
Normal file
@ -0,0 +1,233 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_halffull_newton_intel.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "modify.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalffullNewtonIntel::NPairHalffullNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
_fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full list
|
||||
pair stored once if i,j are both owned and i < j
|
||||
if j is ghost, only store if j coords are "above and to the right" of i
|
||||
works if full list is a skip list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalffullNewtonIntel::build_t(NeighList *list,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int nlocal = atom->nlocal;
|
||||
const int e_nall = nlocal + atom->nghost;
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = list->listfull->numneigh;
|
||||
const int ** _noalias const firstneigh_full =
|
||||
(const int ** const)list->listfull->firstneigh;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[i];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
int addme = 1;
|
||||
if (j < nlocal) {
|
||||
if (i > j) addme = 0;
|
||||
} else {
|
||||
if (x[j].z < ztmp) addme = 0;
|
||||
if (x[j].z == ztmp) {
|
||||
if (x[j].y < ytmp) addme = 0;
|
||||
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
|
||||
}
|
||||
}
|
||||
if (addme)
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full 3-body list
|
||||
half list is already stored as first part of 3-body list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int e_nall = atom->nlocal + atom->nghost;
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = numhalf;
|
||||
const int ** _noalias const firstneigh_full =
|
||||
(const int ** const)list->listfull->firstneigh;
|
||||
|
||||
int packthreads = 1;
|
||||
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel if(packthreads > 1)
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[ii];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalffullNewtonIntel::build(NeighList *list)
|
||||
{
|
||||
if (_fix->three_body_neighbor() == 0) {
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
build_t(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
build_t(list, _fix->get_double_buffers());
|
||||
else
|
||||
build_t(list, _fix->get_single_buffers());
|
||||
} else {
|
||||
int *nhalf, *cnum;
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
_fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf);
|
||||
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
_fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<double>(list, nhalf);
|
||||
} else {
|
||||
_fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf);
|
||||
}
|
||||
}
|
||||
}
|
||||
72
src/USER-INTEL/npair_halffull_newton_intel.h
Normal file
72
src/USER-INTEL/npair_halffull_newton_intel.h
Normal file
@ -0,0 +1,72 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(halffull/newton/intel,
|
||||
NPairHalffullNewtonIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI| NP_INTEL)
|
||||
|
||||
NPairStyle(halffull/newton/skip/intel,
|
||||
NPairHalffullNewtonIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
|
||||
#define LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalffullNewtonIntel : public NPair {
|
||||
public:
|
||||
NPairHalffullNewtonIntel(class LAMMPS *);
|
||||
~NPairHalffullNewtonIntel() {}
|
||||
void build(class NeighList *);
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template<class flt_t, class acc_t>
|
||||
void build_t(NeighList *, IntelBuffers<flt_t,acc_t> *);
|
||||
|
||||
template<class flt_t>
|
||||
void build_t3(NeighList *, int *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: The 'package intel' command is required for /intel styles
|
||||
|
||||
Self explanatory.
|
||||
|
||||
*/
|
||||
@ -52,12 +52,61 @@ NPairIntel::~NPairIntel() {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairIntel::copy_neighbor_info()
|
||||
{
|
||||
NPair::copy_neighbor_info();
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
copy_cutsq_info(_fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
copy_cutsq_info(_fix->get_double_buffers());
|
||||
else
|
||||
copy_cutsq_info(_fix->get_single_buffers());
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairIntel::copy_cutsq_info(IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
int tp1 = atom->ntypes + 1;
|
||||
int use_ghost_cut = 0;
|
||||
if (cutneighghostsq)
|
||||
use_ghost_cut = 1;
|
||||
buffers->set_ntypes(tp1, use_ghost_cut);
|
||||
|
||||
flt_t **cutneighsqb = buffers->get_cutneighsq();
|
||||
for (int i = 1; i <= atom->ntypes; i++)
|
||||
for (int j = 1; j <= atom->ntypes; j++)
|
||||
cutneighsqb[i][j] = cutneighsq[i][j];
|
||||
|
||||
flt_t **cutneighghostsqb;
|
||||
if (use_ghost_cut) {
|
||||
cutneighghostsqb = buffers->get_cutneighghostsq();
|
||||
for (int i = 1; i <= atom->ntypes; i++)
|
||||
for (int j = 1; j <= atom->ntypes; j++)
|
||||
cutneighghostsqb[i][j] = cutneighghostsq[i][j];
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_cop < 0) return;
|
||||
int tp1sq = tp1 * tp1;
|
||||
flt_t * ocutneighsq = cutneighsqb[0];
|
||||
#pragma offload_transfer target(mic:_cop) in(ocutneighsq: length(tp1sq))
|
||||
if (use_ghost_cut) {
|
||||
flt_t * ocutneighghostsq = cutneighghostsqb[0];
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(ocutneighghostsq: length(tp1sq))
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t, int offload_noghost, int need_ic,
|
||||
int FULL, int TRI, int THREE>
|
||||
void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const int astart, const int aend,
|
||||
const int /*offload_end*/) {
|
||||
const int offload_end) {
|
||||
|
||||
if (aend-astart == 0) return;
|
||||
|
||||
@ -71,7 +120,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
const int pack_width = _fix->nbor_pack_width();
|
||||
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
int * _noalias const intel_list = buffers->intel_list(list);
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
@ -94,8 +143,10 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh();
|
||||
|
||||
const int nstencil = this->nstencil;
|
||||
const int * _noalias const stencil = this->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
@ -168,6 +219,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
|
||||
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(intel_list:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
out(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
@ -178,7 +230,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
|
||||
in(offload_end,separate_buffers,astart,aend,nlocal,molecular) \
|
||||
in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
|
||||
in(pack_width,special_bound) \
|
||||
in(pack_width,special_bound) \
|
||||
out(overflow:length(5) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
signal(tag)
|
||||
@ -212,7 +264,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(numneigh, overflow, nstencilp, binstart, binend)
|
||||
shared(overflow, nstencilp, binstart, binend)
|
||||
#endif
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
@ -246,7 +298,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
const int obound = maxnbors * 3;
|
||||
#endif
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
int *neighptr = intel_list + ct;
|
||||
int *neighptr2;
|
||||
if (THREE) neighptr2 = neighptr;
|
||||
|
||||
@ -605,12 +657,24 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
if (n > maxnbors) *overflow = 1;
|
||||
|
||||
ilist[i] = i;
|
||||
cnumneigh[i] = ct;
|
||||
firstneigh[i] = intel_list + ct;
|
||||
if (THREE) {
|
||||
numneigh[i] = ns;
|
||||
cnumneigh[i] = ct;
|
||||
#ifdef LMP_INTEL_3BODY_FAST
|
||||
cnumneigh[i] += lane;
|
||||
#else
|
||||
// Pad anyways just in case we have hybrid with 2-body and newton off
|
||||
int pad_end = ns;
|
||||
IP_PRE_neighbor_pad(pad_end, offload);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; ns < pad_end; ns++)
|
||||
neighptr[n++] = e_nall;
|
||||
#endif
|
||||
numneigh[i] = ns;
|
||||
} else {
|
||||
numneigh[i] = n;
|
||||
int pad_end = n;
|
||||
@ -631,7 +695,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
if (lane == pack_width) {
|
||||
ct += max_chunk * pack_width;
|
||||
IP_PRE_edge_align(ct, sizeof(int));
|
||||
neighptr = firstneigh + ct;
|
||||
neighptr = intel_list + ct;
|
||||
neighptr2 = neighptr;
|
||||
max_chunk = 0;
|
||||
lane = 0;
|
||||
@ -647,7 +711,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
{
|
||||
ct += n;
|
||||
//IP_PRE_edge_align(ct, sizeof(int));
|
||||
neighptr = firstneigh + ct;
|
||||
neighptr = intel_list + ct;
|
||||
if (THREE) neighptr2 = neighptr;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
@ -667,7 +731,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
int ghost_offset = 0, nall_offset = e_nall;
|
||||
if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
if (!THREE) IP_PRE_neighbor_pad(jnum, offload);
|
||||
#if __INTEL_COMPILER+0 > 1499
|
||||
@ -712,7 +776,7 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
}
|
||||
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
if (!THREE) IP_PRE_neighbor_pad(jnum, offload);
|
||||
int jj = 0;
|
||||
@ -739,13 +803,12 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
if (offload) {
|
||||
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
firstneigh[0] = intel_list;
|
||||
for (int n = 0; n < aend; n++) {
|
||||
ilist[n] = n;
|
||||
numneigh[n] = 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
if (separate_buffers) {
|
||||
_fix->start_watch(TIME_PACK);
|
||||
_fix->set_neighbor_host_sizes();
|
||||
@ -756,11 +819,6 @@ void NPairIntel::bin_newton(const int /*offload*/, NeighList *list,
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
}
|
||||
#else
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -75,7 +75,8 @@ class NPairIntel : public NPair {
|
||||
public:
|
||||
NPairIntel(class LAMMPS *);
|
||||
~NPairIntel();
|
||||
|
||||
virtual void copy_neighbor_info();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
void grow_stencil();
|
||||
#endif
|
||||
@ -83,6 +84,9 @@ class NPairIntel : public NPair {
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void copy_cutsq_info(IntelBuffers<flt_t,acc_t> *);
|
||||
|
||||
template <class flt_t, class acc_t, int, int, int, int, int>
|
||||
void bin_newton(const int, NeighList *, IntelBuffers<flt_t,acc_t> *,
|
||||
const int, const int, const int offload_end = 0);
|
||||
|
||||
230
src/USER-INTEL/npair_skip_intel.cpp
Normal file
230
src/USER-INTEL/npair_skip_intel.cpp
Normal file
@ -0,0 +1,230 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_skip_intel.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "modify.h"
|
||||
#include "molecule.h"
|
||||
#include "neigh_request.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairSkipIntel::NPairSkipIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
_fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
_inum_starts = new int[comm->nthreads];
|
||||
_inum_counts = new int[comm->nthreads];
|
||||
_full_props = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairSkipIntel::~NPairSkipIntel() {
|
||||
delete []_inum_starts;
|
||||
delete []_inum_counts;
|
||||
if (_full_props) delete []_full_props;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairSkipIntel::copy_neighbor_info()
|
||||
{
|
||||
NPair::copy_neighbor_info();
|
||||
if (_full_props) delete []_full_props;
|
||||
_full_props = new int[neighbor->nlist];
|
||||
for (int i = 0; i < neighbor->nlist; i++)
|
||||
_full_props[i] = neighbor->requests[i]->full;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build skip list for subset of types from parent list
|
||||
works for half and full lists
|
||||
works for owned (non-ghost) list, also for ghost list
|
||||
iskip and ijskip flag which atom types and type pairs to skip
|
||||
if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class flt_t, int THREE>
|
||||
void NPairSkipIntel::build_t(NeighList *list, int *numhalf, int *cnumneigh,
|
||||
int *numhalf_skip)
|
||||
{
|
||||
const int nlocal = atom->nlocal;
|
||||
const int e_nall = nlocal + atom->nghost;
|
||||
const int * _noalias const type = atom->type;
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = (int ** const)list->firstneigh;
|
||||
const int * _noalias const ilist_skip = list->listskip->ilist;
|
||||
const int * _noalias const numneigh_skip = list->listskip->numneigh;
|
||||
const int ** _noalias const firstneigh_skip =
|
||||
(const int ** const)list->listskip->firstneigh;
|
||||
const int * _noalias const iskip = list->iskip;
|
||||
const int ** _noalias const ijskip = (const int ** const)list->ijskip;
|
||||
|
||||
int num_skip = list->listskip->inum;
|
||||
if (list->ghost) num_skip += list->listskip->gnum;
|
||||
|
||||
int packthreads;
|
||||
if (comm->nthreads > INTEL_HTHREADS && THREE==0)
|
||||
packthreads = comm->nthreads;
|
||||
else
|
||||
packthreads = 1;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel if(packthreads > 1)
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, num_skip, packthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
int my_inum = ifrom;
|
||||
_inum_starts[tid] = ifrom;
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
const int i = ilist_skip[ii];
|
||||
const int itype = type[i];
|
||||
if (iskip[itype]) continue;
|
||||
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
// loop over parent non-skip list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_skip[i];
|
||||
const int jnum = numneigh_skip[i];
|
||||
|
||||
if (THREE) {
|
||||
const int jnumhalf = numhalf_skip[ii];
|
||||
for (int jj = 0; jj < jnumhalf; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
if (!ijskip[itype][type[j]]) neighptr[n++] = joriginal;
|
||||
}
|
||||
numhalf[my_inum] = n;
|
||||
|
||||
for (int jj = jnumhalf; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
if (!ijskip[itype][type[j]]) neighptr[n++] = joriginal;
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
if (!ijskip[itype][type[j]]) neighptr[n++] = joriginal;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[my_inum++] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
|
||||
int last_inum = 0, loop_end;
|
||||
_inum_counts[tid] = my_inum;
|
||||
}
|
||||
int inum = _inum_counts[0];
|
||||
for (int tid = 1; tid < packthreads; tid++) {
|
||||
for (int i = _inum_starts[tid]; i < _inum_counts[tid]; i++) {
|
||||
if (THREE) numhalf[inum] = numhalf[i];
|
||||
ilist[inum++] = ilist[i];
|
||||
}
|
||||
}
|
||||
list->inum = inum;
|
||||
|
||||
if (THREE && num_skip > 0) {
|
||||
int * const list_start = firstneigh[ilist[0]];
|
||||
for (int ii = 0; ii < inum; ii++) {
|
||||
int i = ilist[ii];
|
||||
cnumneigh[ii] = static_cast<int>(firstneigh[i] - list_start);
|
||||
}
|
||||
}
|
||||
if (list->ghost) {
|
||||
int num = 0;
|
||||
int my_inum = list->inum;
|
||||
for (int i = 0; i < my_inum; i++)
|
||||
if (ilist[i] < nlocal) num++;
|
||||
else break;
|
||||
list->inum = num;
|
||||
list->gnum = my_inum - num;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairSkipIntel::build(NeighList *list)
|
||||
{
|
||||
if (_fix->three_body_neighbor()==0 ||
|
||||
_full_props[list->listskip->index] == 0) {
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
build_t<float,0>(list, 0, 0, 0);
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
build_t<double,0>(list, 0, 0, 0);
|
||||
else
|
||||
build_t<float,0>(list, 0, 0, 0);
|
||||
} else {
|
||||
int *nhalf, *cnumneigh, *nhalf_skip, *u;
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
_fix->get_mixed_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
|
||||
_fix->get_mixed_buffers()->grow_data3(list, nhalf, cnumneigh);
|
||||
build_t<float,1>(list, nhalf, cnumneigh, nhalf_skip);
|
||||
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
_fix->get_double_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
|
||||
_fix->get_double_buffers()->grow_data3(list, nhalf, cnumneigh);
|
||||
build_t<double,1>(list, nhalf, cnumneigh, nhalf_skip);
|
||||
} else {
|
||||
_fix->get_single_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
|
||||
_fix->get_single_buffers()->grow_data3(list,nhalf,cnumneigh);
|
||||
build_t<float,1>(list, nhalf, cnumneigh, nhalf_skip);
|
||||
}
|
||||
}
|
||||
}
|
||||
69
src/USER-INTEL/npair_skip_intel.h
Normal file
69
src/USER-INTEL/npair_skip_intel.h
Normal file
@ -0,0 +1,69 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(skip/intel,
|
||||
NPairSkipIntel,
|
||||
NP_SKIP | NP_HALF | NP_FULL |
|
||||
NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_INTEL)
|
||||
|
||||
NPairStyle(skip/ghost/intel,
|
||||
NPairSkipIntel,
|
||||
NP_SKIP | NP_HALF | NP_FULL |
|
||||
NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_SKIP_INTEL_H
|
||||
#define LMP_NPAIR_SKIP_INTEL_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairSkipIntel : public NPair {
|
||||
public:
|
||||
NPairSkipIntel(class LAMMPS *);
|
||||
~NPairSkipIntel();
|
||||
virtual void copy_neighbor_info();
|
||||
void build(class NeighList *);
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
int *_inum_starts, *_inum_counts, *_full_props;
|
||||
|
||||
template<class flt_t, int THREE>
|
||||
void build_t(NeighList *, int *numhalf, int *cnumneigh, int *numhalf_skip);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: The 'package intel' command is required for /intel styles
|
||||
|
||||
Self explanatory.
|
||||
|
||||
*/
|
||||
@ -97,6 +97,13 @@ struct LAMMPS_NS::PairAIREBOIntelParam {
|
||||
|
||||
namespace {
|
||||
|
||||
struct NeighListLMPAIREBO {
|
||||
int * num; /* num_all */
|
||||
int * num_half; /* num_all */
|
||||
int * offset; /* num_all */
|
||||
int ** entries; /* num_all * num_neighs_per_atom */
|
||||
};
|
||||
|
||||
struct NeighListAIREBO {
|
||||
int * num; /* num_all */
|
||||
int * num_half; /* num_all */
|
||||
@ -125,7 +132,7 @@ struct KernelArgsAIREBOT {
|
||||
int neigh_from_atom, neigh_to_atom;
|
||||
int rebuild_flag;
|
||||
flt_t skin;
|
||||
struct NeighListAIREBO neigh_lmp;
|
||||
struct NeighListLMPAIREBO neigh_lmp;
|
||||
struct NeighListAIREBO neigh_rebo;
|
||||
PairAIREBOIntelParam<flt_t,acc_t> params;
|
||||
struct AtomAIREBOT<flt_t> * x; /* num_all */
|
||||
@ -176,6 +183,11 @@ void PairAIREBOIntel::init_style()
|
||||
PairAIREBO::init_style();
|
||||
neighbor->requests[neighbor->nrequest-1]->intel = 1;
|
||||
|
||||
const int nrequest = neighbor->nrequest;
|
||||
for (int i = 0; i < nrequest; ++i)
|
||||
if (neighbor->requests[i]->skip)
|
||||
error->all(FLERR, "Cannot yet use airebo/intel with hybrid.");
|
||||
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
@ -399,8 +411,7 @@ void PairAIREBOIntel::eval(
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
const int * _noalias const numneighhalf = buffers->get_atombin();
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
tagint * const tag = atom->tag;
|
||||
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
@ -441,7 +452,6 @@ void PairAIREBOIntel::eval(
|
||||
|
||||
#pragma offload target(mic:_cop) if(offload) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneighhalf:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
@ -472,10 +482,8 @@ void PairAIREBOIntel::eval(
|
||||
f_stride, x, 0/*q*/);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EVFLAG) {
|
||||
oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
}
|
||||
if (EVFLAG)
|
||||
oevdwl = oecoul = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -508,8 +516,7 @@ void PairAIREBOIntel::eval(
|
||||
args.skin = skin;
|
||||
args.neigh_lmp.num = const_cast<int*>(numneigh);
|
||||
args.neigh_lmp.num_half = const_cast<int*>(numneighhalf);
|
||||
args.neigh_lmp.offset = const_cast<int*>(cnumneigh);
|
||||
args.neigh_lmp.entries = const_cast<int*>(firstneigh);
|
||||
args.neigh_lmp.entries = const_cast<int**>(firstneigh);
|
||||
args.neigh_rebo.num = REBO_numneigh;
|
||||
args.neigh_rebo.num_half = REBO_num_skin;
|
||||
args.neigh_rebo.offset = REBO_cnumneigh;
|
||||
@ -543,18 +550,14 @@ void PairAIREBOIntel::eval(
|
||||
IP_PRE_fdotr_reduce(1, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
if (EVFLAG) {
|
||||
if (EFLAG) {
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
}
|
||||
if (vflag) {
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
ev_global[5] = ov3;
|
||||
ev_global[6] = ov4;
|
||||
ev_global[7] = ov5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
ev_global[5] = ov3;
|
||||
ev_global[6] = ov4;
|
||||
ev_global[7] = ov5;
|
||||
}
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||
@ -578,28 +581,20 @@ template<class flt_t, class acc_t>
|
||||
void PairAIREBOIntel::pack_force_const(IntelBuffers<flt_t,acc_t> * buffers) {
|
||||
int tp1 = atom->ntypes + 1;
|
||||
|
||||
buffers->set_ntypes(tp1,1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
flt_t **cutneighghostsq = buffers->get_cutneighghostsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
cut = cutghost[i][j] + neighbor->skin;
|
||||
cutneighghostsq[i][j] = cutneighghostsq[j][i] = cut*cut;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_cop < 0) return;
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
size_t VL = 512 / 8 / sizeof(flt_t);
|
||||
int ntypes = tp1;
|
||||
int tp1sq = tp1 * tp1;
|
||||
@ -607,7 +602,6 @@ void PairAIREBOIntel::pack_force_const(IntelBuffers<flt_t,acc_t> * buffers) {
|
||||
// it might not be freed if this method is called more than once
|
||||
int * map = this->map;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(map: length(tp1) alloc_if(1) free_if(0))
|
||||
#endif
|
||||
|
||||
@ -1580,7 +1574,7 @@ void ref_rebo_neigh(KernelArgsAIREBOT<flt_t,acc_t> * ka) {
|
||||
ka->nC[i] = 0;
|
||||
ka->nH[i] = 0;
|
||||
for (int j = 0; j < ka->neigh_lmp.num[i]; j++) {
|
||||
int ji = ka->neigh_lmp.entries[ka->neigh_lmp.offset[i] + j];
|
||||
int ji = ka->neigh_lmp.entries[i][j];
|
||||
flt_t delx = ka->x[i].x - ka->x[ji].x;
|
||||
flt_t dely = ka->x[i].y - ka->x[ji].y;
|
||||
flt_t delz = ka->x[i].z - ka->x[ji].z;
|
||||
@ -2168,7 +2162,7 @@ void ref_lennard_jones_single_atom(KernelArgsAIREBOT<flt_t,acc_t> * ka, int i,
|
||||
int morseflag) {
|
||||
AtomAIREBOT<flt_t> * x = ka->x;
|
||||
int jj;
|
||||
int * neighs = ka->neigh_lmp.entries + ka->neigh_lmp.offset[i];
|
||||
int * neighs = ka->neigh_lmp.entries[i];
|
||||
int jnum = ka->neigh_lmp.num_half[i];
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
int j = neighs[jj];
|
||||
@ -2332,7 +2326,7 @@ static void aut_rebo_neigh(KernelArgsAIREBOT<flt_t,acc_t> * ka) {
|
||||
int jnum = ka->rebuild_flag ? ka->neigh_lmp.num[i] :
|
||||
ka->neigh_rebo.num_half[i];
|
||||
int * neighs = ka->rebuild_flag ?
|
||||
&ka->neigh_lmp.entries[ka->neigh_lmp.offset[i]] :
|
||||
ka->neigh_lmp.entries[i] :
|
||||
&ka->neigh_rebo.entries[ka->neigh_rebo.offset[i]+jnum];
|
||||
int * skin_target = &ka->neigh_rebo.entries[offset+ka->num_neighs_per_atom];
|
||||
int n = 0;
|
||||
@ -4554,7 +4548,7 @@ static void aut_lennard_jones(KernelArgsAIREBOT<flt_t,acc_t> * ka) {
|
||||
fvec p_lj40 = fvec::set1(ka->params.lj4[itype][0]);
|
||||
fvec p_lj41 = fvec::set1(ka->params.lj4[itype][1]);
|
||||
|
||||
int * neighs = ka->neigh_lmp.entries + ka->neigh_lmp.offset[i];
|
||||
int * neighs = ka->neigh_lmp.entries[i];
|
||||
int jnum = ka->neigh_lmp.num_half[i];
|
||||
|
||||
bool tap_success = aut_airebo_lj_test_all_paths(ka, i, &test_path_result);
|
||||
|
||||
@ -107,4 +107,10 @@ E: Cannot open AIREBO potential file %s
|
||||
The specified AIREBO potential file cannot be opened. Check that the
|
||||
path and name are correct.
|
||||
|
||||
E: Cannot yet use airebo/intel with hybrid.
|
||||
|
||||
Pair style airebo/intel cannot currently be used as part of a hybrid
|
||||
pair style (with the exception of hybrid/overlay).
|
||||
|
||||
|
||||
*/
|
||||
|
||||
@ -144,8 +144,7 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
@ -182,7 +181,6 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
||||
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
|
||||
in(c_force, c_energy, c_cut:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||
@ -204,8 +202,10 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, q);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = oecoul = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -231,8 +231,8 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
||||
const C_FORCE_T * _noalias const c_forcei = c_force + ptr_off;
|
||||
const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off;
|
||||
const C_CUT_T * _noalias const c_cuti = c_cut + ptr_off;
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
@ -361,13 +361,10 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5;
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
}
|
||||
if (vflag) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
oecoul *= (acc_t)0.5;
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -375,6 +372,8 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -439,19 +438,16 @@ void PairBuckCoulCutIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
|
||||
|
||||
fc.set_ntypes(tp1, ntable, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,12 +479,10 @@ void PairBuckCoulCutIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
C_FORCE_T * c_force = fc.c_force[0];
|
||||
C_ENERGY_T * c_energy = fc.c_energy[0];
|
||||
C_CUT_T * c_cut = fc.c_cut[0];
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||
in(c_force, c_energy, c_cut: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
in(c_force, c_energy, c_cut: length(tp1sq) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -144,8 +144,7 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
@ -206,7 +205,6 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
in(c_force, c_energy:length(0) alloc_if(0) free_if(0)) \
|
||||
in(rho_inv:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||
@ -230,8 +228,10 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, q);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = oecoul = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -265,8 +265,8 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off;
|
||||
const flt_t * _noalias const rho_invi = rho_inv + ptr_off;
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
@ -437,13 +437,10 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5;
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
}
|
||||
if (vflag) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
oecoul *= (acc_t)0.5;
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -451,6 +448,8 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -523,19 +522,16 @@ void PairBuckCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
|
||||
|
||||
fc.set_ntypes(tp1, ntable, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -591,15 +587,13 @@ void PairBuckCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
flt_t * detable = fc.detable;
|
||||
flt_t * ctable = fc.ctable;
|
||||
flt_t * dctable = fc.dctable;
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||
in(c_force, c_energy: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(rho_inv: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(table: length(ntable) alloc_if(0) free_if(0)) \
|
||||
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -135,8 +135,7 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const C_FORCE_T * _noalias const c_force = fc.c_force[0];
|
||||
@ -167,7 +166,6 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
||||
in(special_lj:length(0) alloc_if(0) free_if(0)) \
|
||||
in(c_force, c_energy:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
@ -188,8 +186,10 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, 0);
|
||||
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -214,8 +214,8 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
||||
const int ptr_off = itype * ntypes;
|
||||
const C_FORCE_T * _noalias const c_forcei = c_force + ptr_off;
|
||||
const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off;
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
@ -324,13 +324,9 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5;
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0;
|
||||
}
|
||||
if (vflag) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -338,6 +334,8 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -396,19 +394,16 @@ void PairBuckIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
int tp1 = atom->ntypes + 1;
|
||||
|
||||
fc.set_ntypes(tp1, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -434,12 +429,10 @@ void PairBuckIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
flt_t * special_lj = fc.special_lj;
|
||||
C_FORCE_T * c_force = fc.c_force[0];
|
||||
C_ENERGY_T * c_energy = fc.c_energy[0];
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj: length(4) alloc_if(0) free_if(0)) \
|
||||
in(c_force, c_energy: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
in(c_force, c_energy: length(tp1sq) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -173,8 +173,7 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
const FC_PACKED1_T * _noalias const param = fc.param[0];
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
int * _noalias const rngi_thread = fc.rngi;
|
||||
@ -204,8 +203,10 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, 0);
|
||||
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -233,10 +234,10 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
|
||||
flt_t icut, a0, gamma, sigma;
|
||||
if (ONETYPE) {
|
||||
icut = param[3].icut;
|
||||
a0 = param[3].a0;
|
||||
gamma = param[3].gamma;
|
||||
sigma = param[3].sigma;
|
||||
icut = param[_onetype].icut;
|
||||
a0 = param[_onetype].a0;
|
||||
gamma = param[_onetype].gamma;
|
||||
sigma = param[_onetype].sigma;
|
||||
}
|
||||
for (int ii = iifrom; ii < iito; ii += iip) {
|
||||
const int i = ilist[ii];
|
||||
@ -248,8 +249,8 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
parami = param + ptr_off;
|
||||
}
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp, fytmp, fztmp, fwtmp;
|
||||
@ -379,13 +380,9 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5;
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
}
|
||||
if (vflag) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -393,6 +390,8 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -503,32 +502,26 @@ void PairDPDIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
_onetype = 0;
|
||||
if (atom->ntypes == 1 && !atom->molecular) _onetype = 1;
|
||||
|
||||
int tp1 = atom->ntypes + 1;
|
||||
fc.set_ntypes(tp1,comm->nthreads,buffers->get_max_nbors(),memory,_cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
int mytypes = 0;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
double icut = 1.0 / cut;
|
||||
fc.param[i][j].icut = fc.param[j][i].icut = icut;
|
||||
} else {
|
||||
cut = init_one(i,j);
|
||||
double icut = 1.0 / cut;
|
||||
fc.param[i][j].icut = fc.param[j][i].icut = icut;
|
||||
mytypes++;
|
||||
_onetype = i * tp1 + j;
|
||||
}
|
||||
double cut = init_one(i,j);
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
double icut = 1.0 / cut;
|
||||
fc.param[i][j].icut = fc.param[j][i].icut = icut;
|
||||
}
|
||||
}
|
||||
|
||||
if (mytypes > 1 || atom->molecular) _onetype = 0;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fc.special_lj[i] = force->special_lj[i];
|
||||
fc.special_lj[0] = 1.0;
|
||||
|
||||
@ -193,8 +193,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
const FC_PACKED1_T * _noalias const rhor_spline_f = fc.rhor_spline_f;
|
||||
const FC_PACKED1_T * _noalias const rhor_spline_e = fc.rhor_spline_e;
|
||||
const FC_PACKED2_T * _noalias const z2r_spline_t = fc.z2r_spline_t;
|
||||
@ -243,8 +242,10 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, 0);
|
||||
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -293,8 +294,8 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
itype = x[i].w;
|
||||
rhor_ioff = istride * itype;
|
||||
}
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
const flt_t xtmp = x[i].x;
|
||||
@ -450,8 +451,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
|
||||
if (tid == 0)
|
||||
comm->forward_comm_pair(this);
|
||||
if (NEWTON_PAIR)
|
||||
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp barrier
|
||||
@ -469,8 +469,8 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
rhor_ioff = istride * itype;
|
||||
scale_fi = scale_f + itype*ntypes;
|
||||
}
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp, fytmp, fztmp, fwtmp;
|
||||
@ -597,11 +597,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
}
|
||||
if (vflag) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
@ -610,6 +606,8 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -682,19 +680,16 @@ void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
|
||||
int tp1 = atom->ntypes + 1;
|
||||
fc.set_ntypes(tp1,nr,nrho,memory,_cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i,j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -733,13 +728,13 @@ void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
fc.rhor_spline_e[joff + k].b=rhor_spline[type2rhor[j][i]][k][4];
|
||||
fc.rhor_spline_e[joff + k].c=rhor_spline[type2rhor[j][i]][k][5];
|
||||
fc.rhor_spline_e[joff + k].d=rhor_spline[type2rhor[j][i]][k][6];
|
||||
fc.z2r_spline_t[joff + k].a=z2r_spline[type2rhor[j][i]][k][0];
|
||||
fc.z2r_spline_t[joff + k].b=z2r_spline[type2rhor[j][i]][k][1];
|
||||
fc.z2r_spline_t[joff + k].c=z2r_spline[type2rhor[j][i]][k][2];
|
||||
fc.z2r_spline_t[joff + k].d=z2r_spline[type2rhor[j][i]][k][3];
|
||||
fc.z2r_spline_t[joff + k].e=z2r_spline[type2rhor[j][i]][k][4];
|
||||
fc.z2r_spline_t[joff + k].f=z2r_spline[type2rhor[j][i]][k][5];
|
||||
fc.z2r_spline_t[joff + k].g=z2r_spline[type2rhor[j][i]][k][6];
|
||||
fc.z2r_spline_t[joff + k].a=z2r_spline[type2z2r[j][i]][k][0];
|
||||
fc.z2r_spline_t[joff + k].b=z2r_spline[type2z2r[j][i]][k][1];
|
||||
fc.z2r_spline_t[joff + k].c=z2r_spline[type2z2r[j][i]][k][2];
|
||||
fc.z2r_spline_t[joff + k].d=z2r_spline[type2z2r[j][i]][k][3];
|
||||
fc.z2r_spline_t[joff + k].e=z2r_spline[type2z2r[j][i]][k][4];
|
||||
fc.z2r_spline_t[joff + k].f=z2r_spline[type2z2r[j][i]][k][5];
|
||||
fc.z2r_spline_t[joff + k].g=z2r_spline[type2z2r[j][i]][k][6];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -754,7 +749,7 @@ void PairEAMIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
|
||||
const int nr, const int nrho,
|
||||
Memory *memory,
|
||||
const int cop) {
|
||||
if (ntypes != _ntypes || nr > _nr || nrho > _nrho) {
|
||||
if (ntypes != _ntypes || nr + 1 > _nr || nrho + 1 > _nrho) {
|
||||
if (_ntypes > 0) {
|
||||
_memory->destroy(rhor_spline_f);
|
||||
_memory->destroy(rhor_spline_e);
|
||||
|
||||
@ -228,8 +228,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
|
||||
const FC_PACKED1_T * _noalias const ijc = fc.ijc[0];
|
||||
@ -283,7 +282,6 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
in(rsq_formi, delx_formi, dely_formi: length(0) alloc_if(0) free_if(0)) \
|
||||
in(delz_formi, jtype_formi, jlist_formi: length(0) alloc_if(0) free_if(0))\
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(quat:length(nall+1) alloc_if(0) free_if(0)) \
|
||||
@ -337,9 +335,11 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
#endif
|
||||
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = (acc_t)0.0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
|
||||
if (NEWTON_PAIR == 0) f_start[1].w = 0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -372,8 +372,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
const FC_PACKED1_T * _noalias const ijci = ijc + ptr_off;
|
||||
const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off;
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
const flt_t xtmp = x[i].x;
|
||||
@ -833,13 +833,9 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
f_start[1].w = ierror;
|
||||
} // omp
|
||||
|
||||
if (EFLAG) {
|
||||
if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5;
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
}
|
||||
if (vflag) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
ov0 *= (acc_t)-0.5;
|
||||
ov1 *= (acc_t)-0.5;
|
||||
ov2 *= (acc_t)-0.5;
|
||||
@ -847,6 +843,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)-0.5;
|
||||
ov5 *= (acc_t)-0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -914,19 +912,16 @@ void PairGayBerneIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
if (mthreads < buffers->get_off_threads())
|
||||
mthreads = buffers->get_off_threads();
|
||||
fc.set_ntypes(tp1, _max_nbors, mthreads, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i,j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -963,14 +958,12 @@ void PairGayBerneIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
FC_PACKED1_T *oijc = fc.ijc[0];
|
||||
FC_PACKED2_T *olj34 = fc.lj34[0];
|
||||
FC_PACKED3_T *oic = fc.ic;
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
if (oijc != NULL && oic != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj: length(4) alloc_if(0) free_if(0)) \
|
||||
in(oijc,olj34: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(oic: length(tp1) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq))
|
||||
in(oic: length(tp1) alloc_if(0) free_if(0))
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -138,8 +138,7 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
@ -186,7 +185,6 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||
@ -212,8 +210,10 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, q);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = oecoul = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -248,8 +248,8 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
|
||||
const LJ_T * _noalias const lji = lj + ptr_off;
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
@ -403,16 +403,10 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
oecoul *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
}
|
||||
if (vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -420,6 +414,8 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -487,22 +483,19 @@ void PairLJCharmmCoulCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
int tp1 = atom->ntypes + 1;
|
||||
|
||||
fc.set_ntypes(tp1, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
if (cut_lj > cut_coul)
|
||||
error->all(FLERR,
|
||||
"Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic");
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -540,12 +533,10 @@ void PairLJCharmmCoulCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
flt_t * special_coul = fc.special_coul;
|
||||
flt_t * cutsq = fc.cutsq[0];
|
||||
LJ_T * lj = fc.lj[0];
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -28,7 +28,7 @@
|
||||
#include "suffix.h"
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define LJ_T typename IntelBuffers<flt_t,flt_t>::vec4_t
|
||||
#define LJ_T typename IntelBuffers<flt_t,flt_t>::vec2_t
|
||||
#define TABLE_T typename ForceConst<flt_t>::table_t
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -142,8 +142,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
@ -207,7 +206,6 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||
@ -232,8 +230,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, q);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = oecoul = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -268,8 +268,8 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
|
||||
const LJ_T * _noalias const lji = lj + ptr_off;
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
@ -376,8 +376,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
if (rsq < cut_ljsq) {
|
||||
#endif
|
||||
flt_t r6inv = r2inv * r2inv * r2inv;
|
||||
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
|
||||
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
|
||||
flt_t eps4 = lji[jtype].x;
|
||||
flt_t sigp6 = lji[jtype].y;
|
||||
flt_t lj4 = eps4 * sigp6;
|
||||
flt_t lj3 = lj4 * sigp6;
|
||||
flt_t lj2 = (flt_t)6.0 * lj4;
|
||||
flt_t lj1 = (flt_t)12.0 * lj3;
|
||||
forcelj = r6inv * (lj1 * r6inv - lj2);
|
||||
if (EFLAG) evdwl = r6inv*(lj3 * r6inv - lj4);
|
||||
|
||||
#ifdef INTEL_VMASK
|
||||
if (rsq > cut_lj_innersq) {
|
||||
@ -397,8 +403,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
const flt_t philj = r6inv * (lji[jtype].z*r6inv -
|
||||
lji[jtype].w);
|
||||
const flt_t philj = r6inv * (lj3 * r6inv - lj4);
|
||||
#ifndef INTEL_VMASK
|
||||
if (rsq > cut_lj_innersq)
|
||||
#endif
|
||||
@ -463,16 +468,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
oecoul *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
}
|
||||
if (vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -480,6 +479,8 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -552,22 +553,19 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
|
||||
|
||||
fc.set_ntypes(tp1, ntable, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
if (cut_lj > cut_coul)
|
||||
error->all(FLERR,
|
||||
"Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic");
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -591,10 +589,13 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
|
||||
for (int i = 1; i < tp1; i++) {
|
||||
for (int j = 1; j < tp1; j++) {
|
||||
fc.lj[i][j].x = lj1[i][j];
|
||||
fc.lj[i][j].y = lj2[i][j];
|
||||
fc.lj[i][j].z = lj3[i][j];
|
||||
fc.lj[i][j].w = lj4[i][j];
|
||||
if (i <= j) {
|
||||
fc.lj[i][j].x = epsilon[i][j] * 4.0;
|
||||
fc.lj[i][j].y = pow(sigma[i][j],6.0);
|
||||
} else {
|
||||
fc.lj[i][j].x = epsilon[j][i] * 4.0;
|
||||
fc.lj[i][j].y = pow(sigma[j][i],6.0);
|
||||
}
|
||||
fc.cutsq[i][j] = cutsq[i][j];
|
||||
}
|
||||
}
|
||||
@ -623,14 +624,12 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
flt_t * detable = fc.detable;
|
||||
flt_t * ctable = fc.ctable;
|
||||
flt_t * dctable = fc.dctable;
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(table: length(ntable) alloc_if(0) free_if(0)) \
|
||||
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -647,7 +646,7 @@ void PairLJCharmmCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
|
||||
flt_t * ospecial_lj = special_lj;
|
||||
flt_t * ospecial_coul = special_coul;
|
||||
flt_t * ocutsq = cutsq[0];
|
||||
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
|
||||
typename IntelBuffers<flt_t,flt_t>::vec2_t * olj = lj[0];
|
||||
table_t * otable = table;
|
||||
flt_t * oetable = etable;
|
||||
flt_t * odetable = detable;
|
||||
@ -687,7 +686,7 @@ void PairLJCharmmCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
|
||||
flt_t * ospecial_lj = special_lj;
|
||||
flt_t * ospecial_coul = special_coul;
|
||||
flt_t * ocutsq = cutsq[0];
|
||||
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
|
||||
typename IntelBuffers<flt_t,flt_t>::vec2_t * olj = lj[0];
|
||||
table_t * otable = table;
|
||||
flt_t * oetable = etable;
|
||||
flt_t * odetable = detable;
|
||||
|
||||
@ -69,7 +69,7 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {
|
||||
flt_t cut_lj_innersq;
|
||||
table_t *table;
|
||||
flt_t *etable, *detable, *ctable, *dctable;
|
||||
typename IntelBuffers<flt_t,flt_t>::vec4_t **lj;
|
||||
typename IntelBuffers<flt_t,flt_t>::vec2_t **lj;
|
||||
|
||||
ForceConst() : _ntypes(0), _ntable(0) {}
|
||||
~ForceConst() { set_ntypes(0,0,NULL,_cop); }
|
||||
|
||||
@ -141,8 +141,7 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
@ -201,7 +200,6 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
|
||||
in(c_force, c_energy:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||
@ -225,8 +223,10 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, q);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = oecoul = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -260,8 +260,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const C_FORCE_T * _noalias const c_forcei = c_force + ptr_off;
|
||||
const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off;
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
@ -433,16 +433,10 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
oecoul *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
}
|
||||
if (vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -450,6 +444,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -522,19 +518,16 @@ void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
|
||||
|
||||
fc.set_ntypes(tp1, ntable, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -587,14 +580,12 @@ void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
flt_t * detable = fc.detable;
|
||||
flt_t * ctable = fc.ctable;
|
||||
flt_t * dctable = fc.dctable;
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||
in(c_force, c_energy: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(table: length(ntable) alloc_if(0) free_if(0)) \
|
||||
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -150,8 +150,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int ** _noalias const firstneigh = (const int **)list->firstneigh;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const FC_PACKED1_T * _noalias const ljc12o = fc.ljc12o[0];
|
||||
const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0];
|
||||
@ -180,8 +179,10 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, 0);
|
||||
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0 && inum != nlocal)
|
||||
memset(f_start, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
@ -201,12 +202,12 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
|
||||
flt_t cutsq, lj1, lj2, lj3, lj4, offset;
|
||||
if (ONETYPE) {
|
||||
cutsq = ljc12o[3].cutsq;
|
||||
lj1 = ljc12o[3].lj1;
|
||||
lj2 = ljc12o[3].lj2;
|
||||
lj3 = lj34[3].lj3;
|
||||
lj4 = lj34[3].lj4;
|
||||
offset = ljc12o[3].offset;
|
||||
cutsq = ljc12o[_onetype].cutsq;
|
||||
lj1 = ljc12o[_onetype].lj1;
|
||||
lj2 = ljc12o[_onetype].lj2;
|
||||
lj3 = lj34[_onetype].lj3;
|
||||
lj4 = lj34[_onetype].lj4;
|
||||
offset = ljc12o[_onetype].offset;
|
||||
}
|
||||
for (int ii = iifrom; ii < iito; ii += iip) {
|
||||
const int i = ilist[ii];
|
||||
@ -219,8 +220,8 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
ljc12oi = ljc12o + ptr_off;
|
||||
lj34i = lj34 + ptr_off;
|
||||
}
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
const int * _noalias const jlist = firstneigh[i];
|
||||
int jnum = numneigh[i];
|
||||
IP_PRE_neighbor_pad(jnum, offload);
|
||||
|
||||
acc_t fxtmp, fytmp, fztmp, fwtmp;
|
||||
@ -338,13 +339,9 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5;
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
}
|
||||
if (vflag) {
|
||||
if (EFLAG || vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
@ -352,6 +349,8 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -414,25 +413,26 @@ void PairLJCutIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
_onetype = 0;
|
||||
if (atom->ntypes == 1 && !atom->molecular) _onetype = 1;
|
||||
|
||||
int tp1 = atom->ntypes + 1;
|
||||
fc.set_ntypes(tp1,memory,_cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
int mytypes = 0;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
mytypes++;
|
||||
_onetype = i * tp1 + j;
|
||||
} else {
|
||||
cut = 0.0;
|
||||
}
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
if (mytypes > 1 || atom->molecular) _onetype = 0;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fc.special_lj[i] = force->special_lj[i];
|
||||
|
||||
@ -130,37 +130,37 @@ void PairSWIntel::compute(int eflag, int vflag,
|
||||
if (_onetype) {
|
||||
if (_spq) {
|
||||
if (eflag) {
|
||||
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||
} else {
|
||||
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||
}
|
||||
} else {
|
||||
if (eflag) {
|
||||
eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||
} else {
|
||||
eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (_spq) {
|
||||
if (eflag) {
|
||||
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||
} else {
|
||||
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||
}
|
||||
} else {
|
||||
if (eflag) {
|
||||
eval<0,0,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<0,0,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<0,0,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<0,0,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||
} else {
|
||||
eval<0,0,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
|
||||
eval<0,0,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
|
||||
eval<0,0,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<0,0,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -173,7 +173,7 @@ template <int SPQ,int ONETYPE,int EFLAG,class flt_t,class acc_t>
|
||||
void PairSWIntel::eval(const int offload, const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc, const int astart,
|
||||
const int aend, const int /*pad_width*/)
|
||||
const int aend)
|
||||
{
|
||||
const int inum = aend - astart;
|
||||
if (inum == 0) return;
|
||||
@ -185,10 +185,13 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneighhalf = buffers->get_atombin();
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int * _noalias const firstneigh = list->firstneigh[ilist[0]];
|
||||
|
||||
int *nhalf, *cnum;
|
||||
buffers->get_list_data3(list, nhalf, cnum);
|
||||
const int * _noalias const numneighhalf = nhalf;
|
||||
const int * _noalias const cnumneigh = cnum;
|
||||
|
||||
const FC_PACKED0_T * _noalias const p2 = fc.p2[0];
|
||||
const FC_PACKED1_T * _noalias const p2f = fc.p2f[0];
|
||||
@ -206,6 +209,8 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int eatom = this->eflag_atom;
|
||||
const int onetype = _onetype;
|
||||
const int onetype3 = _onetype3;
|
||||
|
||||
// Determine how much data to transfer
|
||||
int x_size, q_size, f_stride, ev_size, separate_flag;
|
||||
@ -235,8 +240,8 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
in(overflow:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccache_stride,nthreads,inum,nall,ntypes,vflag,eatom,offload) \
|
||||
in(astart,nlocal,f_stride,minlocal,separate_flag,pad_width) \
|
||||
in(ccache_stride,nthreads,inum,nall,ntypes,vflag,eatom,offload,onetype3) \
|
||||
in(astart,nlocal,f_stride,minlocal,separate_flag,onetype) \
|
||||
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
|
||||
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
@ -251,8 +256,8 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, 0);
|
||||
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
@ -278,24 +283,24 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
flt_t cutsq, cut, powerp, powerq, sigma, c1, c2, c3, c4, c5, c6;
|
||||
flt_t sigma_gamma, costheta, lambda_epsilon, lambda_epsilon2;
|
||||
if (ONETYPE) {
|
||||
cutsq = p2[3].cutsq;
|
||||
cut = p2f[3].cut;
|
||||
sigma = p2f[3].sigma;
|
||||
c1 = p2f2[3].c1;
|
||||
c2 = p2f2[3].c2;
|
||||
c3 = p2f2[3].c3;
|
||||
c4 = p2f2[3].c4;
|
||||
sigma_gamma = p2[3].sigma_gamma;
|
||||
costheta = p3[7].costheta;
|
||||
lambda_epsilon = p3[7].lambda_epsilon;
|
||||
lambda_epsilon2 = p3[7].lambda_epsilon2;
|
||||
cutsq = p2[onetype].cutsq;
|
||||
cut = p2f[onetype].cut;
|
||||
sigma = p2f[onetype].sigma;
|
||||
c1 = p2f2[onetype].c1;
|
||||
c2 = p2f2[onetype].c2;
|
||||
c3 = p2f2[onetype].c3;
|
||||
c4 = p2f2[onetype].c4;
|
||||
sigma_gamma = p2[onetype].sigma_gamma;
|
||||
costheta = p3[onetype3].costheta;
|
||||
lambda_epsilon = p3[onetype3].lambda_epsilon;
|
||||
lambda_epsilon2 = p3[onetype3].lambda_epsilon2;
|
||||
if (SPQ == 0) {
|
||||
powerp = p2f[3].powerp;
|
||||
powerq = p2f[3].powerq;
|
||||
powerp = p2f[onetype].powerp;
|
||||
powerq = p2f[onetype].powerq;
|
||||
}
|
||||
if (EFLAG) {
|
||||
c5 = p2e[3].c5;
|
||||
c6 = p2e[3].c6;
|
||||
c5 = p2e[onetype].c5;
|
||||
c6 = p2e[onetype].c6;
|
||||
}
|
||||
}
|
||||
|
||||
@ -312,7 +317,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[ii];
|
||||
const int jnum = numneigh[ii];
|
||||
const int jnum = numneigh[i];
|
||||
const int jnumhalf = numneighhalf[ii];
|
||||
|
||||
acc_t fxtmp, fytmp, fztmp, fwtmp;
|
||||
@ -541,11 +546,9 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(1, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (EFLAG || vflag) {
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
}
|
||||
if (vflag) {
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -585,7 +588,7 @@ template <int SPQ,int ONETYPE,int EFLAG,class flt_t,class acc_t>
|
||||
void PairSWIntel::eval(const int offload, const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc, const int astart,
|
||||
const int aend, const int pad_width)
|
||||
const int aend)
|
||||
{
|
||||
typedef typename SIMD_type<flt_t>::SIMD_vec SIMD_flt_t;
|
||||
typedef typename SIMD_type<acc_t>::SIMD_vec SIMD_acc_t;
|
||||
@ -601,10 +604,13 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneighhalf = buffers->get_atombin();
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int * _noalias const firstneigh = list->firstneigh[ilist[0]];
|
||||
|
||||
int *nhalf, *cnum;
|
||||
buffers->get_list_data3(list, nhalf, cnum);
|
||||
const int * _noalias const numneighhalf = nhalf;
|
||||
const int * _noalias const cnumneigh = cnum;
|
||||
|
||||
const FC_PACKED0_T * _noalias const p2 = fc.p2[0];
|
||||
const FC_PACKED1_T * _noalias const p2f = fc.p2f[0];
|
||||
@ -654,7 +660,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccachei,ccachej,ccachef:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccache_stride,nthreads,inum,nall,ntypes,vflag,eatom,offload) \
|
||||
in(astart,nlocal,f_stride,minlocal,separate_flag,pad_width) \
|
||||
in(astart,nlocal,f_stride,minlocal,separate_flag) \
|
||||
in(ccache_stride3) \
|
||||
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
|
||||
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
|
||||
@ -670,8 +676,8 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
f_stride, x, 0);
|
||||
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
@ -701,34 +707,38 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
SIMD_flt_t cutsq, cut, powerp, powerq, sigma, c1, c2, c3,c4, c5, c6;
|
||||
SIMD_flt_t sigma_gamma, costheta, lambda_epsilon, lambda_epsilon2;
|
||||
if (ONETYPE) {
|
||||
cutsq = SIMD_set(p2[3].cutsq);
|
||||
cut = SIMD_set(p2f[3].cut);
|
||||
sigma = SIMD_set(p2f[3].sigma);
|
||||
c1 = SIMD_set(p2f2[3].c1);
|
||||
c2 = SIMD_set(p2f2[3].c2);
|
||||
c3 = SIMD_set(p2f2[3].c3);
|
||||
c4 = SIMD_set(p2f2[3].c4);
|
||||
sigma_gamma = SIMD_set(p2[3].sigma_gamma);
|
||||
costheta = SIMD_set(p3[7].costheta);
|
||||
lambda_epsilon = SIMD_set(p3[7].lambda_epsilon);
|
||||
lambda_epsilon2 = SIMD_set(p3[7].lambda_epsilon2);
|
||||
cutsq = SIMD_set(p2[_onetype].cutsq);
|
||||
cut = SIMD_set(p2f[_onetype].cut);
|
||||
sigma = SIMD_set(p2f[_onetype].sigma);
|
||||
c1 = SIMD_set(p2f2[_onetype].c1);
|
||||
c2 = SIMD_set(p2f2[_onetype].c2);
|
||||
c3 = SIMD_set(p2f2[_onetype].c3);
|
||||
c4 = SIMD_set(p2f2[_onetype].c4);
|
||||
sigma_gamma = SIMD_set(p2[_onetype].sigma_gamma);
|
||||
costheta = SIMD_set(p3[_onetype3].costheta);
|
||||
lambda_epsilon = SIMD_set(p3[_onetype3].lambda_epsilon);
|
||||
lambda_epsilon2 = SIMD_set(p3[_onetype3].lambda_epsilon2);
|
||||
if (SPQ == 0) {
|
||||
powerp = SIMD_set(p2f[3].powerp);
|
||||
powerq = SIMD_set(p2f[3].powerq);
|
||||
powerp = SIMD_set(p2f[_onetype].powerp);
|
||||
powerq = SIMD_set(p2f[_onetype].powerq);
|
||||
}
|
||||
if (EFLAG) {
|
||||
c5 = SIMD_set(p2e[3].c5);
|
||||
c6 = SIMD_set(p2e[3].c6);
|
||||
c5 = SIMD_set(p2e[_onetype].c5);
|
||||
c6 = SIMD_set(p2e[_onetype].c6);
|
||||
}
|
||||
}
|
||||
|
||||
const SIMD_int goffset = SIMD_set(0,16,32,48,64,80,96,112,128,
|
||||
144,160,176,192,208,224,240);
|
||||
acc_t * const dforce = &(f[0].x);
|
||||
for (int i = iifrom; i < iito; i += iip) {
|
||||
SIMD_int ilistv = SIMD_load(ilist + i);
|
||||
SIMD_int goffset = ilistv * 16;
|
||||
SIMD_mask imask = ilistv < iito;
|
||||
SIMD_mask imask;
|
||||
if (swidth == 16)
|
||||
imask = 0xFFFF;
|
||||
else
|
||||
imask = 0xFF;
|
||||
const int rem = iito - i;
|
||||
if (rem < swidth) imask = imask >> (swidth - rem);
|
||||
SIMD_flt_t xtmp, ytmp, ztmp;
|
||||
SIMD_int itype, itype_offset;
|
||||
|
||||
@ -741,11 +751,12 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
|
||||
#ifdef LMP_INTEL_3BODY_FAST
|
||||
const int* ng = firstneigh + cnumneigh[i] - swidth;
|
||||
const SIMD_int jnum = SIMD_loadz(imask, numneigh + i);
|
||||
#else
|
||||
SIMD_int ng = SIMD_load(cnumneigh + i);
|
||||
const SIMD_int jnum = SIMD_gatherz(imask, numneigh, ilistv);
|
||||
ng = ng - 1;
|
||||
#endif
|
||||
const SIMD_int jnum = SIMD_loadz(imask, numneigh + i);
|
||||
const SIMD_int jnumhalf = SIMD_loadz(imask, numneighhalf + i);
|
||||
const int jnum_max = SIMD_max(jnum);
|
||||
|
||||
@ -1051,11 +1062,9 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(1, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (EFLAG || vflag) {
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = (acc_t)0.0;
|
||||
}
|
||||
if (vflag) {
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -1162,26 +1171,22 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
|
||||
int tp1 = atom->ntypes + 1;
|
||||
fc.set_ntypes(tp1,memory,_cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i,j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
_onetype = 0;
|
||||
if (atom->ntypes == 1) _onetype = 1;
|
||||
|
||||
_spq = 1;
|
||||
int mytypes = 0;
|
||||
for (int ii = 0; ii < tp1; ii++) {
|
||||
int i = map[ii];
|
||||
for (int jj = 0; jj < tp1; jj++) {
|
||||
@ -1231,6 +1236,9 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
fc.p3[ii][jj][kk].lambda_epsilon = 0;
|
||||
fc.p3[ii][jj][kk].lambda_epsilon2 = 0;
|
||||
} else {
|
||||
mytypes++;
|
||||
_onetype = ii * tp1 + jj;
|
||||
_onetype3 = ii * tp1 * tp1 + jj * tp1 + kk;
|
||||
int ijkparam = elem2param[i][j][k];
|
||||
fc.p3[ii][jj][kk].costheta = params[ijkparam].costheta;
|
||||
fc.p3[ii][jj][kk].lambda_epsilon = params[ijkparam].lambda_epsilon;
|
||||
@ -1239,12 +1247,7 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_host_pad = 1;
|
||||
_offload_pad = 1;
|
||||
|
||||
if (INTEL_NBOR_PAD > 1)
|
||||
_host_pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
|
||||
if (mytypes > 1) _onetype = 0;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_cop < 0) return;
|
||||
@ -1253,16 +1256,11 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
FC_PACKED1p2_T *op2f2 = fc.p2f2[0];
|
||||
FC_PACKED2_T *op2e = fc.p2e[0];
|
||||
FC_PACKED3_T *op3 = fc.p3[0][0];
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
int tp1cu = tp1sq * tp1;
|
||||
if (op2 != NULL && op2f != NULL && op2f2 != NULL && op2e != NULL &&
|
||||
op3 != NULL && ocutneighsq != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(op2,op2f,op2f2,op2e: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(op3: length(tp1cu) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq))
|
||||
}
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(op2,op2f,op2f2,op2e: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(op3: length(tp1cu) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -49,13 +49,13 @@ class PairSWIntel : public PairSW {
|
||||
template <int SPQ, int ONETYPE, int EFLAG, class flt_t, class acc_t>
|
||||
void eval(const int offload, const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc,
|
||||
const int astart, const int aend, const int pad_width);
|
||||
const int astart, const int aend);
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t, acc_t> *buffers);
|
||||
|
||||
int _ccache_stride, _host_pad, _offload_pad, _spq, _onetype;
|
||||
int _ccache_stride, _spq, _onetype, _onetype3;
|
||||
#ifdef LMP_USE_AVXCD
|
||||
int _ccache_stride3;
|
||||
#endif
|
||||
@ -75,7 +75,7 @@ class PairSWIntel : public PairSW {
|
||||
flt_t c1, c2, c3, c4;
|
||||
} fc_packed1p2;
|
||||
typedef struct {
|
||||
flt_t c5, c6;
|
||||
flt_t c5, c6, d1, d2;
|
||||
} fc_packed2;
|
||||
typedef struct {
|
||||
flt_t costheta, lambda_epsilon, lambda_epsilon2, pad;
|
||||
|
||||
@ -220,7 +220,7 @@ struct IntelKernelTersoff : public lmp_intel::vector_routines<flt_t, acc_t, mic>
|
||||
static void kernel_step(
|
||||
int eatom, int vflag,
|
||||
const int * _noalias const numneigh,
|
||||
const int * _noalias const cnumneigh,
|
||||
iarr vcnumneigh,
|
||||
const int * _noalias const firstneigh,
|
||||
int ntypes,
|
||||
typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x,
|
||||
@ -241,7 +241,8 @@ struct IntelKernelTersoff : public lmp_intel::vector_routines<flt_t, acc_t, mic>
|
||||
const c_inner_t * _noalias const c_inner,
|
||||
const c_outer_t * _noalias const c_outer,
|
||||
typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f,
|
||||
avec *vsevdwl, int compress_idx, int i, iarr js, bvec vmask_repulsive
|
||||
avec *vsevdwl, int compress_idx, int ii, int i, iarr js,
|
||||
bvec vmask_repulsive
|
||||
);
|
||||
};
|
||||
|
||||
@ -274,9 +275,12 @@ void PairTersoffIntel::eval(const int offload, const int vflag,
|
||||
|
||||
const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const numneighhalf = buffers->get_atombin();
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int * _noalias const firstneigh = list->firstneigh[ilist[0]];
|
||||
|
||||
int *nhalf, *cnum;
|
||||
buffers->get_list_data3(list, nhalf, cnum);
|
||||
const int * _noalias const numneighhalf = nhalf;
|
||||
const int * _noalias const cnumneigh = cnum;
|
||||
|
||||
typedef typename ForceConst<flt_t>::c_inner_t c_inner_t;
|
||||
typedef typename ForceConst<flt_t>::c_outer_t c_outer_t;
|
||||
@ -332,13 +336,13 @@ void PairTersoffIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_repack_for_offload(1, separate_flag, nlocal, nall,
|
||||
f_stride, x, 0);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG || vflag)
|
||||
oevdwl = ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
#pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
#endif
|
||||
{
|
||||
int iifrom, iito, tid;
|
||||
@ -379,11 +383,9 @@ void PairTersoffIntel::eval(const int offload, const int vflag,
|
||||
IP_PRE_fdotr_reduce(1, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (EFLAG || vflag) {
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = 0.0;
|
||||
}
|
||||
if (vflag) {
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
@ -461,19 +463,16 @@ void PairTersoffIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
int tp1 = atom->ntypes + 1;
|
||||
|
||||
fc.set_ntypes(tp1, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
double cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
else
|
||||
cut = 0.0;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
}
|
||||
|
||||
@ -547,7 +546,6 @@ void PairTersoffIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
typename ForceConst<flt_t>::c_inner_loop_t * c_inner_loop = fc.c_inner_loop[0][0];
|
||||
typename ForceConst<flt_t>::c_cutoff_t * c_cutoff_inner = fc.c_cutoff_inner[0][0];
|
||||
typename ForceConst<flt_t>::c_inner_t * c_inner = fc.c_inner[0][0];
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
size_t VL = 512 / 8 / sizeof(flt_t);
|
||||
int ntypes = tp1;
|
||||
int ntypes_pad = ntypes + VL - ntypes % VL;
|
||||
@ -557,8 +555,7 @@ void PairTersoffIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(c_first_loop, c_second_loop, c_cutoff_outer, c_outer : length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(c_inner : length(tp1cb) alloc_if(0) free_if(0)) \
|
||||
in(c_cutoff_inner : length(tp1cb_pad) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
in(c_cutoff_inner : length(tp1cb_pad) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -647,7 +644,7 @@ template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i>
|
||||
template<int EFLAG>
|
||||
void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step(
|
||||
int eatom, int vflag,
|
||||
const int * _noalias const numneigh, const int * _noalias const cnumneigh,
|
||||
const int * _noalias const numneigh, iarr vcnumneigh,
|
||||
const int * _noalias const firstneigh, int ntypes,
|
||||
typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x,
|
||||
const typename PairTersoffIntel::ForceConst<flt_t>::c_inner_t * _noalias const c_inner,
|
||||
@ -681,6 +678,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step(
|
||||
// TDO: We could extract this from the driver routine
|
||||
ivec vis = v::int_mullo(v_i4floats, v::int_load_vl(is));
|
||||
ivec vjs = v::int_mullo(v_i4floats, v::int_load_vl(js));
|
||||
ivec vcnumneigh_i = v::int_load_vl(vcnumneigh);
|
||||
bvec vmask = v::mask_enable_lower(compress_idx);
|
||||
fvec vx_i = v::zero(), vy_i = v::zero(), vz_i = v::zero();
|
||||
ivec vw_i = v_i0;
|
||||
@ -692,7 +690,6 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step(
|
||||
fvec vrijsq = vdx_ij * vdx_ij + vdy_ij * vdy_ij + vdz_ij * vdz_ij;
|
||||
fvec vrij = sqrt(vrijsq);
|
||||
ivec vis_orig = v::int_load_vl(is);
|
||||
ivec vcnumneigh_i = v::int_gather<4>(v_i0, vmask, vis_orig, cnumneigh);
|
||||
ivec vnumneigh_i = v::int_gather<4>(v_i0, vmask, vis_orig, numneigh);
|
||||
ivec vc_idx_ij = v::int_mullo(v_i4floats, vw_j + v::int_mullo(v_i_ntypes, vw_i));
|
||||
|
||||
@ -930,6 +927,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i(
|
||||
typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f,
|
||||
avec *vsevdwl,
|
||||
int compress_idx,
|
||||
int ii,
|
||||
int i,
|
||||
iarr js,
|
||||
bvec vmask_repulsive
|
||||
@ -970,7 +968,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i(
|
||||
fvec vrijsq = vdx_ij * vdx_ij + vdy_ij * vdy_ij + vdz_ij * vdz_ij;
|
||||
fvec vrij = sqrt(vrijsq);
|
||||
|
||||
int cnumneigh_i = cnumneigh[i];
|
||||
int cnumneigh_i = cnumneigh[ii];
|
||||
int numneigh_i = numneigh[i];
|
||||
ivec vc_idx_j = v::int_mullo(v_i4floats, vw_j);
|
||||
ivec vc_idx_j_ntypes = v::int_mullo(v_i_ntypes, vc_idx_j);
|
||||
@ -1147,7 +1145,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel(
|
||||
) {
|
||||
int compress_idx = 0;
|
||||
int ii, jj;
|
||||
iarr is, js;
|
||||
iarr is, js, vc;
|
||||
avec vsevdwl = v::acc_zero();
|
||||
ivec v_i4floats(static_cast<int>(sizeof(typename v::fscal) * 4));
|
||||
ivec vj, v_NEIGHMASK(NEIGHMASK);
|
||||
@ -1166,7 +1164,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel(
|
||||
flt_t y_i = x[i].y;
|
||||
flt_t z_i = x[i].z;
|
||||
int jlist_off_i = cnumneigh[ii];
|
||||
int jnum = numneigh[ii];
|
||||
int jnum = numneigh[i];
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
int j = firstneigh[jlist_off_i + jj] & NEIGHMASK;
|
||||
int w_j = x[j].w;
|
||||
@ -1178,6 +1176,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel(
|
||||
if (rsq < cutsq) {
|
||||
is[compress_idx] = ii;
|
||||
js[compress_idx] = j;
|
||||
vc[compress_idx] = jlist_off_i;
|
||||
if (jj < numneighhalf[ii])
|
||||
repulsive_flag[compress_idx] = 1;
|
||||
compress_idx += 1;
|
||||
@ -1187,7 +1186,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel(
|
||||
vmask_repulsive = v::int_cmpneq(v::int_load_vl(repulsive_flag), ivec(0));
|
||||
kernel_step<EFLAG>(
|
||||
eatom, vflag,
|
||||
numneigh, cnumneigh, firstneigh, ntypes,
|
||||
numneigh, vc, firstneigh, ntypes,
|
||||
x, c_inner, c_outer, f,
|
||||
&vsevdwl, compress_idx,
|
||||
is, js, vmask_repulsive
|
||||
@ -1203,7 +1202,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel(
|
||||
numneigh, cnumneigh, firstneigh, ntypes,
|
||||
x, c_inner, c_outer, f,
|
||||
&vsevdwl, compress_idx,
|
||||
i, js, vmask_repulsive
|
||||
ii, i, js, vmask_repulsive
|
||||
);
|
||||
compress_idx = 0;
|
||||
v::int_clear_arr(repulsive_flag);
|
||||
@ -1215,7 +1214,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel(
|
||||
vmask_repulsive = v::int_cmpneq(v::int_load_vl(repulsive_flag), ivec(0));
|
||||
IntelKernelTersoff::kernel_step<EFLAG>(
|
||||
eatom, vflag,
|
||||
numneigh, cnumneigh, firstneigh, ntypes,
|
||||
numneigh, vc, firstneigh, ntypes,
|
||||
x, c_inner, c_outer, f,
|
||||
&vsevdwl, compress_idx,
|
||||
is, js, vmask_repulsive
|
||||
|
||||
@ -581,6 +581,12 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
else
|
||||
nthr = comm->nthreads;
|
||||
|
||||
if (fix->need_zero(0)) {
|
||||
int zl = nlocal;
|
||||
if (force->newton_pair) zl += atom->nghost;
|
||||
memset(f, 0, zl * sizeof(FORCE_T));
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(nlocal, nthr) if(!_use_lrt)
|
||||
@ -726,6 +732,12 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
FFT_SCALAR * _noalias const particle_eky = this->particle_eky;
|
||||
FFT_SCALAR * _noalias const particle_ekz = this->particle_ekz;
|
||||
|
||||
if (fix->need_zero(0)) {
|
||||
int zl = nlocal;
|
||||
if (force->newton_pair) zl += atom->nghost;
|
||||
memset(f, 0, zl * sizeof(FORCE_T));
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(nlocal, nthr) if(!_use_lrt)
|
||||
|
||||
@ -79,7 +79,9 @@ void *Memory::srealloc(void *ptr, bigint nbytes, const char *name)
|
||||
|
||||
#if defined(LMP_USE_TBB_ALLOCATOR)
|
||||
ptr = scalable_aligned_realloc(ptr, nbytes, LAMMPS_MEMALIGN);
|
||||
#elif defined(LMP_INTEL_NO_TBB) && defined(LAMMPS_MEMALIGN)
|
||||
#elif defined(LMP_INTEL_NO_TBB) && defined(LAMMPS_MEMALIGN) && \
|
||||
defined(__INTEL_COMPILER)
|
||||
|
||||
ptr = realloc(ptr, nbytes);
|
||||
uintptr_t offset = ((uintptr_t)(const void *)(ptr)) % LAMMPS_MEMALIGN;
|
||||
if (offset) {
|
||||
|
||||
@ -207,6 +207,10 @@ class Pair : protected Pointers {
|
||||
|
||||
typedef union {int i; float f;} union_int_float_t;
|
||||
|
||||
// Accessor for the user-intel package to determine virial calc for hybrid
|
||||
|
||||
inline int fdotr_is_set() const { return vflag_fdotr; }
|
||||
|
||||
protected:
|
||||
int vflag_fdotr;
|
||||
int maxeatom,maxvatom;
|
||||
|
||||
Reference in New Issue
Block a user