Feb2021 GPU Package Update - GPU Package Files
This commit is contained in:
@ -24,6 +24,9 @@
|
||||
#include "geryon/ocl_mat.h"
|
||||
#include "geryon/ocl_kernel.h"
|
||||
using namespace ucl_opencl;
|
||||
#ifndef LAL_NO_OCL_EV_JIT
|
||||
#define LAL_OCL_EV_JIT
|
||||
#endif
|
||||
#elif defined(USE_CUDART)
|
||||
#include "geryon/nvc_timer.h"
|
||||
#include "geryon/nvc_mat.h"
|
||||
@ -178,7 +181,7 @@ class Atom {
|
||||
ii+=m_size-n;
|
||||
}
|
||||
UCL_H_Vec<dev_typ> view;
|
||||
view.view((dev_typ*)buffer.begin(),m_size*m_size,*dev);
|
||||
view.view_offset(0,buffer,m_size*m_size);
|
||||
ucl_copy(dev_v,view,false);
|
||||
}
|
||||
|
||||
@ -197,7 +200,26 @@ class Atom {
|
||||
ii+=m_size-n;
|
||||
}
|
||||
UCL_H_Vec<dev_typ> view;
|
||||
view.view((dev_typ*)buffer.begin(),m_size*m_size,*dev);
|
||||
view.view_offset(0,buffer,m_size*m_size);
|
||||
ucl_copy(dev_v,view,false);
|
||||
}
|
||||
|
||||
/// Pack LAMMPS atom type constants into 2 vectors and copy to device
|
||||
template <class dev_typ, class t1, class t2>
|
||||
inline void type_pack2(const int n, UCL_D_Vec<dev_typ> &dev_v,
|
||||
UCL_H_Vec<numtyp> &buffer, t1 ***one, t2 ***two) {
|
||||
int ii=0;
|
||||
for (int i=0; i<n; i++) {
|
||||
for (int j=0; j<n; j++) {
|
||||
for (int k=0; k<n; k++) {
|
||||
buffer[ii*2]=static_cast<numtyp>(one[i][j][k]);
|
||||
buffer[ii*2+1]=static_cast<numtyp>(two[i][j][k]);
|
||||
ii++;
|
||||
}
|
||||
}
|
||||
}
|
||||
UCL_H_Vec<dev_typ> view;
|
||||
view.view_offset(0,buffer,n*n*n);
|
||||
ucl_copy(dev_v,view,false);
|
||||
}
|
||||
|
||||
@ -217,7 +239,7 @@ class Atom {
|
||||
ii+=m_size-n;
|
||||
}
|
||||
UCL_H_Vec<dev_typ> view;
|
||||
view.view((dev_typ*)buffer.begin(),m_size*m_size,*dev);
|
||||
view.view_offset(0,buffer,m_size*m_size);
|
||||
ucl_copy(dev_v,view,false);
|
||||
}
|
||||
|
||||
@ -238,7 +260,7 @@ class Atom {
|
||||
ii+=m_size-n;
|
||||
}
|
||||
UCL_H_Vec<dev_typ> view;
|
||||
view.view((dev_typ*)buffer.begin(),m_size*m_size,*dev);
|
||||
view.view_offset(0,buffer,m_size*m_size);
|
||||
ucl_copy(dev_v,view,false);
|
||||
}
|
||||
|
||||
@ -251,7 +273,7 @@ class Atom {
|
||||
buffer[i*2+1]=static_cast<numtyp>(two[i][i]);
|
||||
}
|
||||
UCL_H_Vec<dev_typ> view;
|
||||
view.view((dev_typ*)buffer.begin(),n,*dev);
|
||||
view.view_offset(0,buffer,n);
|
||||
ucl_copy(dev_v,view,false);
|
||||
}
|
||||
|
||||
@ -261,6 +283,9 @@ class Atom {
|
||||
inline void data_unavail()
|
||||
{ _x_avail=false; _q_avail=false; _quat_avail=false; _v_avail=false; _resized=false; }
|
||||
|
||||
typedef struct { double x,y,z; } vec3d;
|
||||
typedef struct { numtyp x,y,z,w; } vec4d_t;
|
||||
|
||||
/// Cast positions and types to write buffer
|
||||
inline void cast_x_data(double **host_ptr, const int *host_type) {
|
||||
if (_x_avail==false) {
|
||||
@ -269,13 +294,16 @@ class Atom {
|
||||
memcpy(host_x_cast.begin(),host_ptr[0],_nall*3*sizeof(double));
|
||||
memcpy(host_type_cast.begin(),host_type,_nall*sizeof(int));
|
||||
#else
|
||||
int wl=0;
|
||||
vec3d *host_p=reinterpret_cast<vec3d*>(&(host_ptr[0][0]));
|
||||
vec4d_t *xp=reinterpret_cast<vec4d_t*>(&(x[0]));
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for (int i=0; i<_nall; i++) {
|
||||
x[wl]=host_ptr[i][0];
|
||||
x[wl+1]=host_ptr[i][1];
|
||||
x[wl+2]=host_ptr[i][2];
|
||||
x[wl+3]=host_type[i];
|
||||
wl+=4;
|
||||
xp[i].x=host_p[i].x;
|
||||
xp[i].y=host_p[i].y;
|
||||
xp[i].z=host_p[i].z;
|
||||
xp[i].w=host_type[i];
|
||||
}
|
||||
#endif
|
||||
_time_cast+=MPI_Wtime()-t;
|
||||
@ -320,6 +348,11 @@ class Atom {
|
||||
} else if (sizeof(numtyp)==sizeof(double))
|
||||
memcpy(q.host.begin(),host_ptr,_nall*sizeof(numtyp));
|
||||
else
|
||||
#if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp parallel for simd schedule(static)
|
||||
#elif (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int i=0; i<_nall; i++) q[i]=host_ptr[i];
|
||||
_time_cast+=MPI_Wtime()-t;
|
||||
}
|
||||
@ -346,6 +379,11 @@ class Atom {
|
||||
} else if (sizeof(numtyp)==sizeof(double))
|
||||
memcpy(quat.host.begin(),host_ptr,_nall*4*sizeof(numtyp));
|
||||
else
|
||||
#if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp parallel for simd schedule(static)
|
||||
#elif (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int i=0; i<_nall*4; i++) quat[i]=host_ptr[i];
|
||||
_time_cast+=MPI_Wtime()-t;
|
||||
}
|
||||
@ -370,13 +408,16 @@ class Atom {
|
||||
memcpy(host_v_cast.begin(),host_ptr[0],_nall*3*sizeof(double));
|
||||
memcpy(host_tag_cast.begin(),host_tag,_nall*sizeof(int));
|
||||
#else
|
||||
int wl=0;
|
||||
vec3d *host_p=reinterpret_cast<vec3d*>(&(host_ptr[0][0]));
|
||||
vec4d_t *vp=reinterpret_cast<vec4d_t*>(&(v[0]));
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for (int i=0; i<_nall; i++) {
|
||||
v[wl]=host_ptr[i][0];
|
||||
v[wl+1]=host_ptr[i][1];
|
||||
v[wl+2]=host_ptr[i][2];
|
||||
v[wl+3]=host_tag[i];
|
||||
wl+=4;
|
||||
vp[i].x=host_p[i].x;
|
||||
vp[i].y=host_p[i].y;
|
||||
vp[i].z=host_p[i].z;
|
||||
vp[i].w=host_tag[i];
|
||||
}
|
||||
#endif
|
||||
_time_cast+=MPI_Wtime()-t;
|
||||
|
||||
Reference in New Issue
Block a user