Merge pull request #2854 from akohlmey/final-fixes-for-patch-release

Final fixes for the patch release
This commit is contained in:
Axel Kohlmeyer
2021-07-27 20:31:52 -04:00
committed by GitHub
11 changed files with 59 additions and 34 deletions

View File

@ -34,6 +34,10 @@ if (test $1 = 1) then
echo "Must install KSPACE package with DIELECTRIC package"
exit 1
fi
if (test ! -e ../pair_lj_cut_coul_debye.cpp) then
echo "Must install EXTRA-PAIR package with DIELECTRIC package"
exit 1
fi
fi
for file in *.cpp *.h; do

View File

@ -623,7 +623,7 @@ public:
) {
assert(scale == sizeof(FVEC_SCAL_T));
# if FVEC_LEN==8
FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_,
FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_,
_mm512_castsi512_si256(idx.val_),
a.val_, sizeof(FVEC_SCAL_T));
# else
@ -668,7 +668,7 @@ public:
const double * mem, const int scale
) {
assert(scale == sizeof(double));
__m512d lo = _mm512_mask_i32gather_pd(src.lo_, mask.val_,
__m512d lo = _mm512_mask_i32gather_pd(src.lo_, mask.val_,
_mm512_castsi512_si256(idx.val_),
mem, sizeof(double));
__m512d hi = _mm512_mask_i32gather_pd(src.hi_, get_bvec_hi(mask.val_),

View File

@ -338,7 +338,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#endif
// TO BE DEPRECATED
#ifndef USE_OMP_SIMD
#ifndef USE_OMP_SIMD
#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \
f_stride, pos, ov0, ov1, ov2, \

View File

@ -838,10 +838,10 @@ namespace ip_simd {
x = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom,
_MM_SCALE_2);
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+1,
_MM_SCALE_2);
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+2,
_MM_SCALE_2);
}
@ -852,10 +852,10 @@ namespace ip_simd {
x = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom,
_MM_SCALE_2);
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+1,
_MM_SCALE_2);
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+2,
_MM_SCALE_2);
type = _mm512_mask_i32gather_epi32(_mm512_undefined_epi32(), m, i, atom+3,
@ -895,7 +895,7 @@ namespace ip_simd {
const SIMD_int &joffset, SIMD_double &eng) {
SIMD_double jeng;
SIMD_conflict_pi_reduce1(rmask, joffset, eng);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jeng = jeng + eng;
@ -908,7 +908,7 @@ namespace ip_simd {
SIMD_double engd, jeng;
engd = _mm512_cvtps_pd(_mm512_castps512_ps256(eng));
SIMD_conflict_pi_reduce1(rmask, joffset, engd);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jeng = jeng + engd;
@ -920,7 +920,7 @@ namespace ip_simd {
_mm512_shuffle_f32x4(eng,eng,238)));
SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238);
SIMD_conflict_pi_reduce1(rmask2, joffset2, engd);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
_mm512_castsi512_si256(joffset2),
force, _MM_SCALE_2);
jeng = jeng + engd;
@ -939,7 +939,7 @@ namespace ip_simd {
SIMD_double jeng;
SIMD_conflict_pi_reduce1(rmask, joffset, eng);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jeng = jeng + eng;
@ -971,7 +971,7 @@ namespace ip_simd {
SIMD_double &fy, SIMD_double &fz) {
SIMD_conflict_pi_reduce3(m, i, fx, fy, fz);
SIMD_double jfrc;
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force,
_MM_SCALE_2);
jfrc = jfrc + fx;
@ -983,7 +983,7 @@ namespace ip_simd {
jfrc = jfrc + fy;
_mm512_mask_i32scatter_pd(force+1, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 2,
_MM_SCALE_2);
jfrc = jfrc + fz;
@ -1000,13 +1000,13 @@ namespace ip_simd {
amzd = _mm512_cvtps_pd(_mm512_castps512_ps256(amz));
SIMD_conflict_pi_reduce3(rmask, joffset, amxd, amyd, amzd);
SIMD_double jfrc;
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jfrc = jfrc + amxd;
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force + 1, _MM_SCALE_2);
jfrc = jfrc + amyd;
@ -1028,13 +1028,13 @@ namespace ip_simd {
_mm512_shuffle_f32x4(amz,amz,238)));
SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238);
SIMD_conflict_pi_reduce3(rmask2, joffset2, amxd, amyd, amzd);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
_mm512_castsi512_si256(joffset2),
force, _MM_SCALE_2);
jfrc = jfrc + amxd;
_mm512_mask_i32scatter_pd(force, rmask2, _mm512_castsi512_si256(joffset2),
jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
_mm512_castsi512_si256(joffset2),
force + 1, _MM_SCALE_2);
jfrc = jfrc + amyd;
@ -1045,7 +1045,7 @@ namespace ip_simd {
_mm512_castsi512_si256(joffset2),
force + 2, _MM_SCALE_2);
jfrc = jfrc + amzd;
_mm512_mask_i32scatter_pd(force+2, rmask2,
_mm512_mask_i32scatter_pd(force+2, rmask2,
_mm512_castsi512_si256(joffset2), jfrc,
_MM_SCALE_2);
}
@ -1099,7 +1099,7 @@ namespace ip_simd {
const SIMD_int &i, const SIMD_double &fx,
const SIMD_double &fy, const SIMD_double &fz) {
SIMD_double jfrc;
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force,
_MM_SCALE_2);
jfrc = jfrc - fx;
@ -1866,7 +1866,7 @@ namespace ip_simd {
jfrc = jfrc + fx;
_mm512_mask_i32scatter_pd(force, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 1,
_MM_SCALE_2);
jfrc = jfrc + fy;
@ -1880,7 +1880,7 @@ namespace ip_simd {
_MM_SCALE_2);
if (EFLAG) {
if (eatom) {
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i),
force + 3, _MM_SCALE_2);
jfrc = jfrc + fwtmp;

View File

@ -231,7 +231,7 @@ void PairBuckIntel::eval(const int offload, const int vflag,
fxtmp = fytmp = fztmp = (acc_t)0;
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
if (NEWTON_PAIR == 0)
if (vflag == VIRIAL_PAIR)
if (vflag == VIRIAL_PAIR)
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
#if defined(LMP_SIMD_COMPILER)

View File

@ -251,20 +251,20 @@ action pair_lj_class2_kokkos.cpp pair_lj_class2.cpp
action pair_lj_class2_kokkos.h pair_lj_class2.h
action pair_lj_cut_coul_cut_kokkos.cpp
action pair_lj_cut_coul_cut_kokkos.h
action pair_lj_cut_coul_debye_kokkos.cpp
action pair_lj_cut_coul_debye_kokkos.h
action pair_lj_cut_coul_dsf_kokkos.cpp
action pair_lj_cut_coul_dsf_kokkos.h
action pair_lj_cut_coul_debye_kokkos.cpp pair_lj_cut_coul_debye.cpp
action pair_lj_cut_coul_debye_kokkos.h pair_lj_cut_coul_debye.h
action pair_lj_cut_coul_dsf_kokkos.cpp pair_lj_cut_coul_dsf.cpp
action pair_lj_cut_coul_dsf_kokkos.h pair_lj_cut_coul_dsf.h
action pair_lj_cut_coul_long_kokkos.cpp pair_lj_cut_coul_long.cpp
action pair_lj_cut_coul_long_kokkos.h pair_lj_cut_coul_long.h
action pair_lj_cut_kokkos.cpp
action pair_lj_cut_kokkos.h
action pair_lj_expand_kokkos.cpp
action pair_lj_expand_kokkos.h
action pair_lj_gromacs_coul_gromacs_kokkos.cpp
action pair_lj_gromacs_coul_gromacs_kokkos.h
action pair_lj_gromacs_kokkos.cpp
action pair_lj_gromacs_kokkos.h
action pair_lj_gromacs_coul_gromacs_kokkos.cpp pair_lj_gromacs_coul_gromacs.cpp
action pair_lj_gromacs_coul_gromacs_kokkos.h pair_lj_gromacs_coul_gromacs.h
action pair_lj_gromacs_kokkos.cpp pair_lj_gromacs.cpp
action pair_lj_gromacs_kokkos.h pair_lj_gromacs.h
action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp
action pair_lj_sdk_kokkos.h pair_lj_sdk.h
action pair_morse_kokkos.cpp