Misc Improvements to GPU Package

- Optimizations for molecular systems
-   Improved kernel performance and greater CPU overlap
- Reduced GPU to CPU communications for discrete devices
- Switch classic Intel makefiles to use LLVM-based compilers
- Prefetch optimizations supported for OpenCL
- Optimized data repack for quaternions
This commit is contained in:
W. Michael Brown
2023-03-05 21:03:12 -08:00
parent 142876a59b
commit 37f22c8627
151 changed files with 1085 additions and 617 deletions

View File

@ -217,7 +217,7 @@ __kernel void interp(const __global numtyp4 *restrict x_,
const grdtyp delxinv, const grdtyp delyinv,
const grdtyp delzinv, const int order,
const int order2, const grdtyp qqrd2e_scale,
__global acctyp4 *restrict ans) {
__global acctyp3 *restrict ans) {
__local grdtyp rho_coeff[PPPM_MAX_SPLINE*PPPM_MAX_SPLINE];
__local grdtyp rho1d_0[PPPM_MAX_SPLINE][PPPM_BLOCK_1D];
__local grdtyp rho1d_1[PPPM_MAX_SPLINE][PPPM_BLOCK_1D];
@ -239,7 +239,7 @@ __kernel void interp(const __global numtyp4 *restrict x_,
fetch(qs,ii,q_tex);
qs*=qqrd2e_scale;
acctyp4 ek;
acctyp3 ek;
ek.x=(acctyp)0.0;
ek.y=(acctyp)0.0;
ek.z=(acctyp)0.0;