Converting cilk vectorization directives to the openmp standard and changing some more depracated vector intrinsics. Data alignment directives for compiler vectorization are still mostly intel specific.

This commit is contained in:
Mike Brown
2021-07-26 11:22:21 -07:00
parent 1345c25f41
commit fefcd0e2af
33 changed files with 1013 additions and 189 deletions

View File

@ -394,8 +394,12 @@ void PPPMIntel::particle_map(IntelBuffers<flt_t,acc_t> *buffers)
IP_PRE_omp_range_id_align(iifrom, iito, tid, nlocal, nthr, sizeof(ATOM_T));
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:flag)
#else
#pragma simd reduction(+:flag)
#endif
#pragma vector aligned
#endif
for (int i = iifrom; i < iito; i++) {
@ -500,7 +504,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho_lookup[idx][k];
@ -509,7 +517,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1,r2,r3;
@ -541,7 +553,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
int mzy = m*nix + mz;
FFT_SCALAR x0 = y0*rho[1][m];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
int mzyx = l + mzy;
@ -563,7 +579,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid, nthr);
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
for (int j = 1; j < nthr; j++) {
@ -645,7 +665,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho0[k] = rho_lookup[idx][k];
@ -654,7 +678,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1 = rho_coeff[order-1][k];
@ -690,7 +718,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
int my = m+nysum;
FFT_SCALAR y0 = z0*rho1[m];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
int mx = l+nxsum;
@ -813,7 +845,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho_lookup[idx][k];
@ -825,7 +861,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
@ -871,7 +911,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
int mx = l + nxsum;
@ -893,7 +937,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
particle_ekx[i] *= hx_inv;
@ -942,7 +990,11 @@ void PPPMIntel::precompute_rho()
for (int i = 0; i < rho_points; i++) {
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower; k<=nupper;k++) {
FFT_SCALAR r1 = ZEROF;
@ -956,7 +1008,11 @@ void PPPMIntel::precompute_rho()
}
if (differentiation_flag == 1) {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower; k<=nupper;k++) {
FFT_SCALAR r1 = ZEROF;