Fixes to the Intel package to remove errors and warnings with clang-based compiler and support alternate preprocessor defines for the next-gen Intel compiler (currently not recommended for performance)..
This commit is contained in:
@ -85,7 +85,11 @@ struct vector_ops {};
|
||||
// Intrinsic routines for IMCI and AVX-512
|
||||
#if defined(__MIC__) || defined(__AVX512F__)
|
||||
// Integer vector class
|
||||
#ifdef __INTEL_LLVM_COMPILER
|
||||
#pragma pack(push,16)
|
||||
#else
|
||||
#pragma pack(push,64)
|
||||
#endif
|
||||
struct ivec32x16 {
|
||||
__m512i vec;
|
||||
ivec32x16() {}
|
||||
@ -113,7 +117,7 @@ struct vector_ops<double, KNC> {
|
||||
typedef double fscal;
|
||||
typedef F64vec8 fvec;
|
||||
typedef ivec32x16 ivec;
|
||||
typedef __mmask bvec;
|
||||
typedef __mmask16 bvec;
|
||||
typedef double farr[8] __attribute__((aligned(64)));
|
||||
typedef int iarr[16] __attribute__((aligned(64)));
|
||||
static fvec recip(const fvec &a) { return _mm512_recip_pd(a); }
|
||||
@ -250,7 +254,7 @@ struct vector_ops<float, KNC> {
|
||||
typedef float fscal;
|
||||
typedef F32vec16 fvec;
|
||||
typedef ivec32x16 ivec;
|
||||
typedef __mmask bvec;
|
||||
typedef __mmask16 bvec;
|
||||
typedef float farr[16] __attribute__((aligned(64)));
|
||||
typedef int iarr[16] __attribute__((aligned(64)));
|
||||
static const bvec full_mask = 0xFFFF;
|
||||
@ -380,16 +384,18 @@ struct vector_ops<float, KNC> {
|
||||
*r3 = gather<4>(*r3, mask, idxs, reinterpret_cast<const char *>(base) + 12);
|
||||
}
|
||||
// Additional routines needed for the implementation of mixed precision
|
||||
static fvec cvtdown(const vector_ops<double,KNC>::fvec &lo, const vector_ops<double,KNC>::fvec &hi) {
|
||||
static fvec cvtdown(const vector_ops<double,KNC>::fvec &lo,
|
||||
const vector_ops<double,KNC>::fvec &hi) {
|
||||
__m512 t1 = _mm512_cvtpd_pslo(lo);
|
||||
__m512 t2 = _mm512_cvtpd_pslo(hi);
|
||||
return _mm512_mask_permute4f128_ps(t1, 0xFF00, t2, _MM_PERM_BADC);
|
||||
return _mm512_mask_shuffle_f32x4(_mm512_undefined_ps(), 0xFF00, t2, t2,
|
||||
0x4E);
|
||||
}
|
||||
static vector_ops<double,KNC>::fvec cvtup_lo(const fvec &a) {
|
||||
return _mm512_cvtpslo_pd(a);
|
||||
}
|
||||
static vector_ops<double,KNC>::fvec cvtup_hi(const fvec &a) {
|
||||
return _mm512_cvtpslo_pd(_mm512_permute4f128_ps(a, _MM_PERM_BADC)); // permute DCBA -> BADC
|
||||
return _mm512_cvtpslo_pd(_mm512_shuffle_f32x4(a, a, 0x4E));
|
||||
}
|
||||
static void mask_cvtup(const bvec &a, vector_ops<double,KNC>::bvec *blo, vector_ops<double,KNC>::bvec *bhi) {
|
||||
*blo = a & 0xFF;
|
||||
@ -1692,7 +1698,7 @@ struct vector_ops<flt_t, NONE> {
|
||||
typedef flt_t fscal;
|
||||
typedef flt_t fvec;
|
||||
typedef int ivec;
|
||||
typedef bool bvec;
|
||||
typedef int bvec;
|
||||
typedef flt_t farr[1];
|
||||
typedef int iarr[1];
|
||||
static fvec recip(const fvec &a) {
|
||||
|
||||
@ -651,12 +651,12 @@ class avec16pd {
|
||||
return a >> 8;
|
||||
}
|
||||
VEC_INLINE static __m512i get_ivec_hi(__m512i a) {
|
||||
return _mm512_permute4f128_epi32(a, _MM_PERM_BADC);
|
||||
return _mm512_shuffle_i32x4(a, a, 0x4E);
|
||||
}
|
||||
public:
|
||||
VEC_INLINE avec16pd(const FVEC_NAME &a) {
|
||||
lo_ = _mm512_cvtpslo_pd(a.val_);
|
||||
hi_ = _mm512_cvtpslo_pd(_mm512_permute4f128_ps(a.val_, _MM_PERM_BADC));
|
||||
hi_ = _mm512_cvtpslo_pd(_mm512_shuffle_f32x4(a.val_, a.val_, 0x4E));
|
||||
}
|
||||
VEC_INLINE static avec16pd undefined() {
|
||||
return avec16pd(_mm512_undefined_pd(), _mm512_undefined_pd());
|
||||
|
||||
@ -16,6 +16,11 @@
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef __INTEL_LLVM_COMPILER
|
||||
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
|
||||
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
|
||||
#endif
|
||||
|
||||
#ifdef __INTEL_COMPILER
|
||||
#define LMP_SIMD_COMPILER
|
||||
#if (__INTEL_COMPILER_BUILD_DATE > 20160720)
|
||||
|
||||
@ -2332,7 +2332,7 @@ static void aut_rebo_neigh(KernelArgsAIREBOT<flt_t,acc_t> * ka) {
|
||||
int n_skin = 0;
|
||||
|
||||
int lowest_idx;
|
||||
#pragma unroll(4)
|
||||
//#pragma unroll(4)
|
||||
for (lowest_idx = 0; lowest_idx < jnum; lowest_idx += fvec::VL) {
|
||||
bvec j_mask = bvec::full();
|
||||
if (lowest_idx + fvec::VL > jnum) j_mask = bvec::only(jnum - lowest_idx);
|
||||
|
||||
@ -284,7 +284,7 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned nog2s
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
|
||||
@ -306,7 +306,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
acc_t rhoi = (acc_t)0.0;
|
||||
int ej = 0;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned nog2s
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
@ -325,7 +325,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned nog2s
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:rhoi)
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
@ -412,7 +412,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
if (EFLAG) tevdwl = (acc_t)0.0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned nog2s
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:tevdwl)
|
||||
#endif
|
||||
for (int ii = iifrom; ii < iito; ++ii) {
|
||||
@ -486,7 +486,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
|
||||
int ej = 0;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned nog2s
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
@ -508,7 +508,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned nog2s
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
|
||||
@ -237,7 +237,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned nog2s
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
|
||||
@ -367,11 +367,11 @@ void PairTersoffIntel::eval(const int offload, const int vflag,
|
||||
lmp_intel::vector_traits<lmp_intel::mode>::support_integer_and_gather_ops;
|
||||
bool use_scalar = VL < 4;
|
||||
if (use_scalar) {
|
||||
IntelKernelTersoff<flt_t,acc_t,lmp_intel::NONE,false>::kernel<EFLAG>(ARGS);
|
||||
IntelKernelTersoff<flt_t,acc_t,lmp_intel::NONE,false>::template kernel<EFLAG>(ARGS);
|
||||
} else if (pack_i) {
|
||||
IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,true >::kernel<EFLAG>(ARGS);
|
||||
IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,true >::template kernel<EFLAG>(ARGS);
|
||||
} else {
|
||||
IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,false>::kernel<EFLAG>(ARGS);
|
||||
IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,false>::template kernel<EFLAG>(ARGS);
|
||||
}
|
||||
if (EFLAG) oevdwl += sevdwl;
|
||||
}
|
||||
@ -691,7 +691,8 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step(
|
||||
fvec vrijsq = vdx_ij * vdx_ij + vdy_ij * vdy_ij + vdz_ij * vdz_ij;
|
||||
fvec vrij = sqrt(vrijsq);
|
||||
ivec vis_orig = v::int_load_vl(is);
|
||||
ivec vnumneigh_i = v::int_gather<4>(v_i0, vmask, vis_orig, numneigh);
|
||||
ivec vnumneigh_i = v::template int_gather<4>(v_i0, vmask, vis_orig,
|
||||
numneigh);
|
||||
ivec vc_idx_ij = v::int_mullo(v_i4floats, vw_j + v::int_mullo(v_i_ntypes, vw_i));
|
||||
|
||||
fvec vzeta = v::zero();
|
||||
@ -718,14 +719,16 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step(
|
||||
while (! v::mask_testz(vactive_mask) && cache_idx < N_CACHE) {
|
||||
bvec vnew_mask = vactive_mask & ~ veff_old_mask;
|
||||
vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK &
|
||||
v::int_gather<4>(vks, vactive_mask, vkks + vcnumneigh_i, firstneigh));
|
||||
(v::template int_gather<4>(vks, vactive_mask,
|
||||
vkks + vcnumneigh_i,
|
||||
firstneigh)));
|
||||
v::gather_x(vks, vnew_mask, x, &vx_k, &vy_k, &vz_k, &vw_k);
|
||||
fvec vdx_ik = (vx_k - vx_i);
|
||||
fvec vdy_ik = (vy_k - vy_i);
|
||||
fvec vdz_ik = (vz_k - vz_i);
|
||||
fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik;
|
||||
ivec vc_idx = v::int_mullo(v_i4floats, vw_k) + v::int_mullo(v_i_ntypes, vc_idx_ij);
|
||||
vcutsq = v::gather<4>(vcutsq, vnew_mask, vc_idx, c_inner);
|
||||
vcutsq = v::template gather<4>(vcutsq, vnew_mask, vc_idx, c_inner);
|
||||
bvec vcutoff_mask = v::cmplt(vrsq, vcutsq);
|
||||
bvec vsame_mask = v::int_cmpneq(vjs, vks);
|
||||
bvec veff_mask = vcutoff_mask & vsame_mask & vactive_mask;
|
||||
@ -769,14 +772,16 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step(
|
||||
while (! v::mask_testz(vactive_mask)) {
|
||||
bvec vnew_mask = vactive_mask & ~ veff_old_mask;
|
||||
vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK &
|
||||
v::int_gather<4>(vks, vactive_mask, vkks + vcnumneigh_i, firstneigh));
|
||||
(v::template int_gather<4>(vks, vactive_mask,
|
||||
vkks + vcnumneigh_i,
|
||||
firstneigh)));
|
||||
v::gather_x(vks, vnew_mask, x, &vx_k, &vy_k, &vz_k, &vw_k);
|
||||
fvec vdx_ik = (vx_k - vx_i);
|
||||
fvec vdy_ik = (vy_k - vy_i);
|
||||
fvec vdz_ik = (vz_k - vz_i);
|
||||
fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik;
|
||||
ivec vc_idx = v::int_mullo(v_i4floats, vw_k) + v::int_mullo(v_i_ntypes, vc_idx_ij);
|
||||
vcutsq = v::gather<4>(vcutsq, vnew_mask, vc_idx, c_inner);
|
||||
vcutsq = v::template gather<4>(vcutsq, vnew_mask, vc_idx, c_inner);
|
||||
bvec vcutoff_mask = v::cmplt(vrsq, vcutsq);
|
||||
bvec vsame_mask = v::int_cmpneq(vjs, vks);
|
||||
bvec veff_mask = vcutoff_mask & vsame_mask & vactive_mask;
|
||||
@ -836,14 +841,16 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step(
|
||||
while (! v::mask_testz(vactive_mask)) {
|
||||
bvec vnew_mask = vactive_mask & ~ veff_old_mask;
|
||||
vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK &
|
||||
v::int_gather<4>(vks, vactive_mask, vkks + vcnumneigh_i, firstneigh));
|
||||
(v::template int_gather<4>(vks, vactive_mask,
|
||||
vkks + vcnumneigh_i,
|
||||
firstneigh)));
|
||||
v::gather_x(vks, vnew_mask, x, &vx_k, &vy_k, &vz_k, &vw_k);
|
||||
fvec vdx_ik = vx_k - vx_i;
|
||||
fvec vdy_ik = vy_k - vy_i;
|
||||
fvec vdz_ik = vz_k - vz_i;
|
||||
fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik;
|
||||
ivec vc_idx = v::int_mullo(v_i4floats, vw_k) + v::int_mullo(v_i_ntypes, vc_idx_ij);
|
||||
vcutsq = v::gather<4>(vcutsq, vnew_mask, vc_idx, c_inner);
|
||||
vcutsq = v::template gather<4>(vcutsq, vnew_mask, vc_idx, c_inner);
|
||||
bvec vcutoff_mask = v::cmplt(vrsq, vcutsq);
|
||||
bvec vsame_mask = v::int_cmpneq(vjs, vks);
|
||||
bvec veff_mask = vcutoff_mask & vsame_mask & vactive_mask;
|
||||
@ -991,7 +998,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i(
|
||||
fvec vdy_ik = vy_k - vy_i;
|
||||
fvec vdz_ik = vz_k - vz_i;
|
||||
fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik;
|
||||
fvec vcutsq = v::gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]);
|
||||
fvec vcutsq = v::template gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]);
|
||||
bvec vcutoff_mask = v::cmplt(vrsq, vcutsq);
|
||||
bvec vsame_mask = v::int_cmpneq(vjs, ivec(static_cast<int>(4 * sizeof(typename v::fscal) * k)));
|
||||
bvec veff_mask = vcutoff_mask & vsame_mask & vmask;
|
||||
@ -1035,7 +1042,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i(
|
||||
fvec vdy_ik = vy_k - vy_i;
|
||||
fvec vdz_ik = vz_k - vz_i;
|
||||
fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik;
|
||||
fvec vcutsq = v::gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]);
|
||||
fvec vcutsq = v::template gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]);
|
||||
bvec vcutoff_mask = v::cmplt(vrsq, vcutsq);
|
||||
bvec vsame_mask = v::int_cmpneq(vjs, ivec(static_cast<int>(4 * sizeof(typename v::fscal) * k)));
|
||||
bvec veff_mask = vcutoff_mask & vsame_mask & vmask;
|
||||
@ -1082,7 +1089,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i(
|
||||
fvec vdy_ik = vy_k - vy_i;
|
||||
fvec vdz_ik = vz_k - vz_i;
|
||||
fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik;
|
||||
fvec vcutsq = v::gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k].cutsq);
|
||||
fvec vcutsq = v::template gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k].cutsq);
|
||||
bvec vcutoff_mask = v::cmplt(vrsq, vcutsq);
|
||||
bvec vsame_mask = v::int_cmpneq(vjs, ivec(static_cast<int>(4 * sizeof(typename v::fscal) * k)));
|
||||
bvec veff_mask = vcutoff_mask & vsame_mask & vmask;
|
||||
@ -1228,7 +1235,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel(
|
||||
|
||||
|
||||
template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i>
|
||||
IntelKernelTersoff<flt_t,acc_t,mic,pack_i>::fvec IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::zeta_vector(
|
||||
typename IntelKernelTersoff<flt_t,acc_t,mic,pack_i>::fvec IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::zeta_vector(
|
||||
const c_inner_t * param,
|
||||
ivec xjw, bvec mask,
|
||||
fvec vrij, fvec rsq2,
|
||||
@ -1354,6 +1361,8 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::force_zeta_vector(
|
||||
}
|
||||
}
|
||||
|
||||
#define BCF lmp_intel::vector_routines<flt_t, acc_t, mic>
|
||||
|
||||
template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i>
|
||||
template<bool ZETA>
|
||||
void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::attractive_vector(
|
||||
@ -1393,7 +1402,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::attractive_vector(
|
||||
fvec varg3 = varg1 * varg1 * varg1;
|
||||
bvec mask_ex = v::cmpeq(vppowermint, fvec(3.));
|
||||
fvec varg = v::blend(mask_ex, varg1, varg3);
|
||||
fvec vex_delr = min(fvec(1.e30), exp(varg));
|
||||
fvec vex_delr = BCF::min(fvec(1.e30), exp(varg));
|
||||
fvec vex_delr_d_factor = v::blend(mask_ex, v_1_0, fvec(3.0) * varg1 * varg1);
|
||||
fvec vex_delr_d = vplam3 * vex_delr_d_factor * vex_delr;
|
||||
bvec vmask_need_sine = v::cmpnle(vrik, vpbigr - vpbigd) & mask;
|
||||
@ -1413,12 +1422,12 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::attractive_vector(
|
||||
if (ZETA) *zeta = vfc * vgijk * vex_delr;
|
||||
|
||||
fvec vminus_costheta = - vcostheta;
|
||||
fvec vdcosdrjx = vrijinv * fmadd(vminus_costheta, vrij_hatx, rik_hatx);
|
||||
fvec vdcosdrjy = vrijinv * fmadd(vminus_costheta, vrij_haty, rik_haty);
|
||||
fvec vdcosdrjz = vrijinv * fmadd(vminus_costheta, vrij_hatz, rik_hatz);
|
||||
fvec vdcosdrkx = rikinv * fmadd(vminus_costheta, rik_hatx, vrij_hatx);
|
||||
fvec vdcosdrky = rikinv * fmadd(vminus_costheta, rik_haty, vrij_haty);
|
||||
fvec vdcosdrkz = rikinv * fmadd(vminus_costheta, rik_hatz, vrij_hatz);
|
||||
fvec vdcosdrjx = vrijinv * BCF::fmadd(vminus_costheta, vrij_hatx, rik_hatx);
|
||||
fvec vdcosdrjy = vrijinv * BCF::fmadd(vminus_costheta, vrij_haty, rik_haty);
|
||||
fvec vdcosdrjz = vrijinv * BCF::fmadd(vminus_costheta, vrij_hatz, rik_hatz);
|
||||
fvec vdcosdrkx = rikinv * BCF::fmadd(vminus_costheta, rik_hatx, vrij_hatx);
|
||||
fvec vdcosdrky = rikinv * BCF::fmadd(vminus_costheta, rik_haty, vrij_haty);
|
||||
fvec vdcosdrkz = rikinv * BCF::fmadd(vminus_costheta, rik_hatz, vrij_hatz);
|
||||
fvec vdcosdrix = -(vdcosdrjx + vdcosdrkx);
|
||||
fvec vdcosdriy = -(vdcosdrjy + vdcosdrky);
|
||||
fvec vdcosdriz = -(vdcosdrjz + vdcosdrkz);
|
||||
|
||||
@ -242,9 +242,9 @@ union ubuf {
|
||||
|
||||
// define stack variable alignment
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER) || defined(__INTEL_COMPILER)
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define _alignvar(expr, val) __declspec(align(val)) expr
|
||||
#elif defined(__GNUC__) || defined(__PGI)
|
||||
#elif defined(__GNUC__) || defined(__PGI) || defined(__INTEL_LLVM_COMPILER)
|
||||
#define _alignvar(expr, val) expr __attribute((aligned(val)))
|
||||
#else
|
||||
#define _alignvar(expr, val) expr
|
||||
@ -266,7 +266,7 @@ union ubuf {
|
||||
|
||||
#if defined(__clang__)
|
||||
#define _noopt __attribute__((optnone))
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
|
||||
#define _noopt
|
||||
#elif defined(__PGI)
|
||||
#define _noopt
|
||||
|
||||
@ -16,7 +16,8 @@
|
||||
|
||||
#include "error.h"
|
||||
|
||||
#if defined(LMP_USER_INTEL) && defined(__INTEL_COMPILER)
|
||||
#if defined(LMP_USER_INTEL) && \
|
||||
((defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)))
|
||||
#ifndef LMP_INTEL_NO_TBB
|
||||
#define LMP_USE_TBB_ALLOCATOR
|
||||
#include "tbb/scalable_allocator.h"
|
||||
@ -81,7 +82,7 @@ void *Memory::srealloc(void *ptr, bigint nbytes, const char *name)
|
||||
#if defined(LMP_USE_TBB_ALLOCATOR)
|
||||
ptr = scalable_aligned_realloc(ptr, nbytes, LAMMPS_MEMALIGN);
|
||||
#elif defined(LMP_INTEL_NO_TBB) && defined(LAMMPS_MEMALIGN) && \
|
||||
defined(__INTEL_COMPILER)
|
||||
(defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER))
|
||||
|
||||
ptr = realloc(ptr, nbytes);
|
||||
uintptr_t offset = ((uintptr_t)(const void *)(ptr)) % LAMMPS_MEMALIGN;
|
||||
|
||||
Reference in New Issue
Block a user