Added fphi_mpole in amoeba/gpu, fixed a bug in the kernel when indexing grid
This commit is contained in:
@ -64,8 +64,8 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
cell_size,gpu_split,_screen,amoeba,
|
||||
"k_amoeba_multipole", "k_amoeba_udirect2b",
|
||||
"k_amoeba_umutual2b", "k_amoeba_polar",
|
||||
"k_amoeba_fphi_uind", "k_amoeba_short_nbor",
|
||||
"k_amoeba_special15");
|
||||
"k_amoeba_fphi_uind", "k_amoeba_fphi_mpole",
|
||||
"k_amoeba_short_nbor", "k_amoeba_special15");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -2026,7 +2026,7 @@ __kernel void k_amoeba_fphi_mpole(const __global numtyp4 *restrict thetai1,
|
||||
for (int ib = 0; ib < bsorder; ib++) {
|
||||
int i1 = istart + ib;
|
||||
numtyp4 tha1 = thetai1[i1];
|
||||
int gidx = 2*(k*ngridxy + j*ngridx + i);
|
||||
int gidx = k*ngridxy + j*ngridx + i;
|
||||
numtyp tq = grid[gidx];
|
||||
t0 += tq*tha1.x;
|
||||
t1 += tq*tha1.y;
|
||||
|
||||
@ -179,6 +179,10 @@ void amoeba_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
|
||||
host_fdip_phi2, host_fdip_sum_phi);
|
||||
}
|
||||
|
||||
void amoeba_gpu_fphi_mpole(double ***host_grid_brick, void **host_fphi) {
|
||||
AMOEBAMF.compute_fphi_mpole(host_grid_brick, host_fphi);
|
||||
}
|
||||
|
||||
void amoeba_setup_fft(const int numel, const int element_type) {
|
||||
AMOEBAMF.setup_fft(numel, element_type);
|
||||
}
|
||||
|
||||
@ -38,6 +38,7 @@ BaseAmoebaT::~BaseAmoeba() {
|
||||
k_udirect2b.clear();
|
||||
k_umutual2b.clear();
|
||||
k_fphi_uind.clear();
|
||||
k_fphi_mpole.clear();
|
||||
k_polar.clear();
|
||||
k_special15.clear();
|
||||
k_short_nbor.clear();
|
||||
@ -66,6 +67,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
|
||||
const char *k_name_umutual2b,
|
||||
const char *k_name_polar,
|
||||
const char *k_name_fphi_uind,
|
||||
const char *k_name_fphi_mpole,
|
||||
const char *k_name_short_nbor,
|
||||
const char* k_name_special15) {
|
||||
screen=_screen;
|
||||
@ -100,8 +102,9 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
|
||||
_block_size=device->pair_block_size();
|
||||
_block_bio_size=device->block_bio_pair();
|
||||
compile_kernels(*ucl_device,pair_program,k_name_multipole,
|
||||
k_name_udirect2b, k_name_umutual2b,k_name_polar,
|
||||
k_name_fphi_uind, k_name_short_nbor, k_name_special15);
|
||||
k_name_udirect2b, k_name_umutual2b,k_name_polar,
|
||||
k_name_fphi_uind, k_name_fphi_mpole,
|
||||
k_name_short_nbor, k_name_special15);
|
||||
|
||||
if (_threads_per_atom>1 && gpu_nbor==0) {
|
||||
nbor->packing(true);
|
||||
@ -559,6 +562,7 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double
|
||||
// host_thetai1, host_thetai2, host_thetai3 are allocated with nmax by bsordermax by 4
|
||||
// host_igrid is allocated with nmax by 4
|
||||
// - transfer extra data from host to device
|
||||
// NOTE: can be re-used for fphi_mpole() (already allocate 2x grid points)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -568,7 +572,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out) {
|
||||
|
||||
// update bsorder with that of the kspace solver
|
||||
_bsorder = bsorder;
|
||||
|
||||
// allocate or resize per-atom arrays
|
||||
@ -586,7 +590,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
_fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE);
|
||||
_fdip_sum_phi.alloc(_max_thetai_size*20,*(this->ucl_device),UCL_READ_WRITE);
|
||||
} else {
|
||||
if (inum_full>_max_thetai_size) {
|
||||
if (_thetai1.cols()<_max_thetai_size*bsorder) {
|
||||
_max_thetai_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||
_thetai1.resize(_max_thetai_size*bsorder);
|
||||
_thetai2.resize(_max_thetai_size*bsorder);
|
||||
@ -667,6 +671,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
// ---------------------------------------------------------------------------
|
||||
// fphi_uind = induced potential from grid
|
||||
// fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid
|
||||
// NOTE: host_grid_brick is from ic_kspace post_convolution()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -687,7 +692,7 @@ void BaseAmoebaT::compute_fphi_uind(double ****host_grid_brick,
|
||||
_cgrid_brick[n+1] = host_grid_brick[iz][iy][ix][1];
|
||||
n += 2;
|
||||
}
|
||||
_cgrid_brick.update_device(false);
|
||||
_cgrid_brick.update_device(_num_grid_points*2, false);
|
||||
|
||||
const int red_blocks = fphi_uind();
|
||||
|
||||
@ -727,6 +732,63 @@ int BaseAmoebaT::fphi_uind() {
|
||||
return GX;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// fphi_mpole = multipole potential from grid (limited to polar_kspace for now)
|
||||
// fphi_mpole extracts the permanent multipole potential from
|
||||
// the particle mesh Ewald grid
|
||||
// NOTE: host_grid_brick is from p_kspace post_convolution()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::compute_fphi_mpole(double ***host_grid_brick, void **host_fphi)
|
||||
{
|
||||
// TODO: grid brick[k][j][i] is a scalar
|
||||
UCL_H_Vec<numtyp> hdummy;
|
||||
hdummy.alloc(1, *(this->ucl_device), UCL_READ_ONLY);
|
||||
|
||||
int n = 0;
|
||||
for (int iz = _nzlo_out; iz <= _nzhi_out; iz++)
|
||||
for (int iy = _nylo_out; iy <= _nyhi_out; iy++)
|
||||
for (int ix = _nxlo_out; ix <= _nxhi_out; ix++) {
|
||||
_cgrid_brick[n] = host_grid_brick[iz][iy][ix];
|
||||
n++;
|
||||
}
|
||||
_cgrid_brick.update_device(_num_grid_points, false);
|
||||
|
||||
const int red_blocks = fphi_mpole();
|
||||
|
||||
_fdip_sum_phi.update_host(_max_thetai_size*20);
|
||||
|
||||
*host_fphi = _fdip_sum_phi.host.begin();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Interpolate the potential from the PME grid
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int BaseAmoebaT::fphi_mpole() {
|
||||
int ainum=ans->inum();
|
||||
if (ainum == 0)
|
||||
return 0;
|
||||
|
||||
int _nall=atom->nall();
|
||||
int nbor_pitch=nbor->nbor_pitch();
|
||||
|
||||
// Compute the block size and grid size to keep all cores busy
|
||||
const int BX=block_size();
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
|
||||
|
||||
time_pair.start();
|
||||
int ngridxy = _ngridx * _ngridy;
|
||||
k_fphi_mpole.set_size(GX,BX);
|
||||
k_fphi_mpole.run(&_thetai1, &_thetai2, &_thetai3, &_igrid, &_cgrid_brick,
|
||||
&_fdip_sum_phi, &_bsorder, &ainum,
|
||||
&_nzlo_out, &_nylo_out, &_nxlo_out, &ngridxy, &_ngridx);
|
||||
time_pair.stop();
|
||||
|
||||
return GX;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Reneighbor on GPU if necessary, and then compute polar real-space
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -920,6 +982,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
const char *kname_umutual2b,
|
||||
const char *kname_polar,
|
||||
const char *kname_fphi_uind,
|
||||
const char *kname_fphi_mpole,
|
||||
const char *kname_short_nbor,
|
||||
const char* kname_special15) {
|
||||
if (_compiled)
|
||||
@ -935,6 +998,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
k_umutual2b.set_function(*pair_program, kname_umutual2b);
|
||||
k_polar.set_function(*pair_program, kname_polar);
|
||||
k_fphi_uind.set_function(*pair_program, kname_fphi_uind);
|
||||
k_fphi_mpole.set_function(*pair_program, kname_fphi_mpole);
|
||||
k_short_nbor.set_function(*pair_program, kname_short_nbor);
|
||||
k_special15.set_function(*pair_program, kname_special15);
|
||||
pos_tex.get_texture(*pair_program, "pos_tex");
|
||||
|
||||
@ -64,8 +64,8 @@ class BaseAmoeba {
|
||||
const double gpu_split, FILE *screen, const void *pair_program,
|
||||
const char *kname_multipole, const char *kname_udirect2b,
|
||||
const char *kname_umutual2b, const char *kname_polar,
|
||||
const char *kname_fphi_uind, const char *kname_short_nbor,
|
||||
const char* kname_special15);
|
||||
const char *kname_fphi_uind, const char *kname_fphi_mpole,
|
||||
const char *kname_short_nbor, const char* kname_special15);
|
||||
|
||||
/// Estimate the overhead for GPU context changes and CPU driver
|
||||
void estimate_gpu_overhead(const int add_kernels=0);
|
||||
@ -185,6 +185,8 @@ class BaseAmoeba {
|
||||
void **host_fdip_phi1, void **host_fdip_phi2,
|
||||
void **host_fdip_sum_phi);
|
||||
|
||||
virtual void compute_fphi_mpole(double ***host_grid_brick, void **host_fphi);
|
||||
|
||||
/// Compute polar real-space with device neighboring
|
||||
virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||
double **host_uind, double **host_uinp, double *host_pval,
|
||||
@ -279,7 +281,8 @@ class BaseAmoeba {
|
||||
|
||||
// ------------------------- DEVICE KERNELS -------------------------
|
||||
UCL_Program *pair_program;
|
||||
UCL_Kernel k_multipole, k_udirect2b, k_umutual2b, k_polar, k_fphi_uind;
|
||||
UCL_Kernel k_multipole, k_udirect2b, k_umutual2b, k_polar;
|
||||
UCL_Kernel k_fphi_uind, k_fphi_mpole;
|
||||
UCL_Kernel k_special15, k_short_nbor;
|
||||
inline int block_size() { return _block_size; }
|
||||
inline void set_kernel(const int eflag, const int vflag) {}
|
||||
@ -305,13 +308,14 @@ class BaseAmoeba {
|
||||
void compile_kernels(UCL_Device &dev, const void *pair_string,
|
||||
const char *kname_multipole, const char *kname_udirect2b,
|
||||
const char *kname_umutual2b, const char *kname_polar,
|
||||
const char *kname_fphi_uind, const char *kname_short_nbor,
|
||||
const char* kname_special15);
|
||||
const char *kname_fphi_uind, const char *kname_fphi_mpole,
|
||||
const char *kname_short_nbor, const char* kname_special15);
|
||||
|
||||
virtual int multipole_real(const int eflag, const int vflag) = 0;
|
||||
virtual int udirect2b(const int eflag, const int vflag) = 0;
|
||||
virtual int umutual2b(const int eflag, const int vflag) = 0;
|
||||
virtual int fphi_uind();
|
||||
virtual int fphi_mpole();
|
||||
virtual int polar_real(const int eflag, const int vflag) = 0;
|
||||
|
||||
|
||||
|
||||
@ -67,8 +67,8 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
cell_size,gpu_split,_screen,hippo,
|
||||
"k_hippo_multipole", "k_hippo_udirect2b",
|
||||
"k_hippo_umutual2b", "k_hippo_polar",
|
||||
"k_hippo_fphi_uind", "k_hippo_short_nbor",
|
||||
"k_hippo_special15");
|
||||
"k_hippo_fphi_uind", "k_hippo_fphi_mpole",
|
||||
"k_hippo_short_nbor", "k_hippo_special15");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -2346,6 +2346,184 @@ __kernel void k_hippo_fphi_uind(const __global numtyp4 *restrict thetai1,
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
fphi_mpole = multipole potential from grid
|
||||
fphi_mpole extracts the permanent multipole potential from
|
||||
the particle mesh Ewald grid
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
__kernel void k_hippo_fphi_mpole(const __global numtyp4 *restrict thetai1,
|
||||
const __global numtyp4 *restrict thetai2,
|
||||
const __global numtyp4 *restrict thetai3,
|
||||
const __global int *restrict igrid,
|
||||
const __global numtyp *restrict grid,
|
||||
__global numtyp *restrict fphi,
|
||||
const int bsorder, const int inum,
|
||||
const int nzlo_out, const int nylo_out,
|
||||
const int nxlo_out, const int ngridxy,
|
||||
const int ngridx)
|
||||
{
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=tid+BLOCK_ID_X*BLOCK_SIZE_X;
|
||||
|
||||
if (ii<inum) {
|
||||
|
||||
int nlpts = (bsorder-1) / 2;
|
||||
|
||||
int istart = fast_mul(ii,4);
|
||||
int igridx = igrid[istart];
|
||||
int igridy = igrid[istart+1];
|
||||
int igridz = igrid[istart+2];
|
||||
|
||||
// now istart is used to index thetai1, thetai2 and thetai3
|
||||
istart = fast_mul(ii,bsorder);
|
||||
|
||||
// extract the permanent multipole field at each site
|
||||
|
||||
numtyp tuv000 = (numtyp)0.0;
|
||||
numtyp tuv001 = (numtyp)0.0;
|
||||
numtyp tuv010 = (numtyp)0.0;
|
||||
numtyp tuv100 = (numtyp)0.0;
|
||||
numtyp tuv200 = (numtyp)0.0;
|
||||
numtyp tuv020 = (numtyp)0.0;
|
||||
numtyp tuv002 = (numtyp)0.0;
|
||||
numtyp tuv110 = (numtyp)0.0;
|
||||
numtyp tuv101 = (numtyp)0.0;
|
||||
numtyp tuv011 = (numtyp)0.0;
|
||||
numtyp tuv300 = (numtyp)0.0;
|
||||
numtyp tuv030 = (numtyp)0.0;
|
||||
numtyp tuv003 = (numtyp)0.0;
|
||||
numtyp tuv210 = (numtyp)0.0;
|
||||
numtyp tuv201 = (numtyp)0.0;
|
||||
numtyp tuv120 = (numtyp)0.0;
|
||||
numtyp tuv021 = (numtyp)0.0;
|
||||
numtyp tuv102 = (numtyp)0.0;
|
||||
numtyp tuv012 = (numtyp)0.0;
|
||||
numtyp tuv111 = (numtyp)0.0;
|
||||
|
||||
int k = (igridz - nzlo_out) - nlpts;
|
||||
for (int kb = 0; kb < bsorder; kb++) {
|
||||
/*
|
||||
v0 = thetai3[m][kb][0];
|
||||
v1 = thetai3[m][kb][1];
|
||||
v2 = thetai3[m][kb][2];
|
||||
v3 = thetai3[m][kb][3];
|
||||
*/
|
||||
int i3 = istart + kb;
|
||||
numtyp4 tha3 = thetai3[i3];
|
||||
numtyp v0 = tha3.x;
|
||||
numtyp v1 = tha3.y;
|
||||
numtyp v2 = tha3.z;
|
||||
numtyp v3 = tha3.w;
|
||||
numtyp tu00 = (numtyp)0.0;
|
||||
numtyp tu10 = (numtyp)0.0;
|
||||
numtyp tu01 = (numtyp)0.0;
|
||||
numtyp tu20 = (numtyp)0.0;
|
||||
numtyp tu11 = (numtyp)0.0;
|
||||
numtyp tu02 = (numtyp)0.0;
|
||||
numtyp tu30 = (numtyp)0.0;
|
||||
numtyp tu21 = (numtyp)0.0;
|
||||
numtyp tu12 = (numtyp)0.0;
|
||||
numtyp tu03 = (numtyp)0.0;
|
||||
|
||||
int j = (igridy - nylo_out) - nlpts;
|
||||
for (int jb = 0; jb < bsorder; jb++) {
|
||||
/*
|
||||
u0 = thetai2[m][jb][0];
|
||||
u1 = thetai2[m][jb][1];
|
||||
u2 = thetai2[m][jb][2];
|
||||
u3 = thetai2[m][jb][3];
|
||||
*/
|
||||
int i2 = istart + jb;
|
||||
numtyp4 tha2 = thetai2[i2];
|
||||
numtyp u0 = tha2.x;
|
||||
numtyp u1 = tha2.y;
|
||||
numtyp u2 = tha2.z;
|
||||
numtyp u3 = tha2.w;
|
||||
numtyp t0 = (numtyp)0.0;
|
||||
numtyp t1 = (numtyp)0.0;
|
||||
numtyp t2 = (numtyp)0.0;
|
||||
numtyp t3 = (numtyp)0.0;
|
||||
|
||||
int i = (igridx - nxlo_out) - nlpts;
|
||||
for (int ib = 0; ib < bsorder; ib++) {
|
||||
int i1 = istart + ib;
|
||||
numtyp4 tha1 = thetai1[i1];
|
||||
int gidx = k*ngridxy + j*ngridx + i;
|
||||
numtyp tq = grid[gidx];
|
||||
t0 += tq*tha1.x;
|
||||
t1 += tq*tha1.y;
|
||||
t2 += tq*tha1.z;
|
||||
t3 += tq*tha1.w;
|
||||
i++;
|
||||
}
|
||||
|
||||
tu00 += t0*u0;
|
||||
tu10 += t1*u0;
|
||||
tu01 += t0*u1;
|
||||
tu20 += t2*u0;
|
||||
tu11 += t1*u1;
|
||||
tu02 += t0*u2;
|
||||
tu30 += t3*u0;
|
||||
tu21 += t2*u1;
|
||||
tu12 += t1*u2;
|
||||
tu03 += t0*u3;
|
||||
j++;
|
||||
}
|
||||
|
||||
tuv000 += tu00*v0;
|
||||
tuv100 += tu10*v0;
|
||||
tuv010 += tu01*v0;
|
||||
tuv001 += tu00*v1;
|
||||
tuv200 += tu20*v0;
|
||||
tuv020 += tu02*v0;
|
||||
tuv002 += tu00*v2;
|
||||
tuv110 += tu11*v0;
|
||||
tuv101 += tu10*v1;
|
||||
tuv011 += tu01*v1;
|
||||
tuv300 += tu30*v0;
|
||||
tuv030 += tu03*v0;
|
||||
tuv003 += tu00*v3;
|
||||
tuv210 += tu21*v0;
|
||||
tuv201 += tu20*v1;
|
||||
tuv120 += tu12*v0;
|
||||
tuv021 += tu02*v1;
|
||||
tuv102 += tu10*v2;
|
||||
tuv012 += tu01*v2;
|
||||
tuv111 += tu11*v1;
|
||||
k++;
|
||||
}
|
||||
|
||||
numtyp buf[20];
|
||||
buf[0] = tuv000;
|
||||
buf[1] = tuv100;
|
||||
buf[2] = tuv010;
|
||||
buf[3] = tuv001;
|
||||
buf[4] = tuv200;
|
||||
buf[5] = tuv020;
|
||||
buf[6] = tuv002;
|
||||
buf[7] = tuv110;
|
||||
buf[8] = tuv101;
|
||||
buf[9] = tuv011;
|
||||
buf[10] = tuv300;
|
||||
buf[11] = tuv030;
|
||||
buf[12] = tuv003;
|
||||
buf[13] = tuv210;
|
||||
buf[14] = tuv201;
|
||||
buf[15] = tuv120;
|
||||
buf[16] = tuv021;
|
||||
buf[17] = tuv102;
|
||||
buf[18] = tuv012;
|
||||
buf[19] = tuv111;
|
||||
|
||||
int idx = ii;
|
||||
for (int m = 0; m < 20; m++) {
|
||||
fphi[idx] = buf[m];
|
||||
idx += inum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
scan standard neighbor list and make it compatible with 1-5 neighbors
|
||||
if IJ entry is a 1-2,1-3,1-4 neighbor then adjust offset to SBBITS15
|
||||
|
||||
@ -98,6 +98,8 @@ void amoeba_gpu_precompute_induce(const int inum_full, const int bsorder,
|
||||
void amoeba_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi);
|
||||
|
||||
void amoeba_gpu_fphi_mpole(double ***host_grid_brick, void **host_fdip_sum_phi);
|
||||
|
||||
void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
|
||||
double **host_rpole, double **host_uind, double **host_uinp,
|
||||
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||
@ -339,6 +341,7 @@ void PairAmoebaGPU::induce()
|
||||
|
||||
// allocate memory and make early host-device transfers
|
||||
// must be done before the first ufield0c
|
||||
// NOTE: this is for ic_kspace, and thetai[1-3]
|
||||
|
||||
amoeba_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, thetai2,
|
||||
thetai3, igrid,
|
||||
@ -1311,6 +1314,8 @@ void PairAmoebaGPU::polar_kspace()
|
||||
double cphid[4],cphip[4];
|
||||
double a[3][3]; // indices not flipped vs Fortran
|
||||
|
||||
bool gpu_fphi_mpole_ready = true;
|
||||
|
||||
// indices into the electrostatic field array
|
||||
// decremented by 1 versus Fortran
|
||||
|
||||
@ -1373,6 +1378,18 @@ void PairAmoebaGPU::polar_kspace()
|
||||
moduli();
|
||||
bspline_fill();
|
||||
|
||||
// allocate memory and make early host-device transfers
|
||||
|
||||
// NOTE: this is for p_kspace, and igrid and thetai[1-3] are filled by bpsline_fill
|
||||
if (gpu_fphi_mpole_ready) {
|
||||
amoeba_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, thetai2,
|
||||
thetai3, igrid, p_kspace->nzlo_out,
|
||||
p_kspace->nzhi_out, p_kspace->nylo_out,
|
||||
p_kspace->nyhi_out, p_kspace->nxlo_out,
|
||||
p_kspace->nxhi_out);
|
||||
}
|
||||
|
||||
|
||||
// convert Cartesian multipoles to fractional coordinates
|
||||
|
||||
cmp_to_fmp(cmp,fmp);
|
||||
@ -1441,8 +1458,24 @@ void PairAmoebaGPU::polar_kspace()
|
||||
double ***gridpost = (double ***) p_kspace->post_convolution();
|
||||
|
||||
// get potential
|
||||
|
||||
fphi_mpole(gridpost,fphi);
|
||||
|
||||
if (!gpu_fphi_mpole_ready) {
|
||||
fphi_mpole(gridpost,fphi);
|
||||
//printf("cpu phi = %f %f %f\n", fphi[0][0],fphi[0][1],fphi[0][2]);
|
||||
} else {
|
||||
void* fphi_pinned = nullptr;
|
||||
amoeba_gpu_fphi_mpole(gridpost, &fphi_pinned);
|
||||
|
||||
double *_fphi_ptr = (double *)fphi_pinned;
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
int idx = i;
|
||||
for (int m = 0; m < 20; m++) {
|
||||
fphi[i][m] = _fphi_ptr[idx];
|
||||
idx += nlocal;
|
||||
}
|
||||
}
|
||||
//printf("gpu phi = %f %f %f\n", fphi[0][0],fphi[0][1],fphi[0][2]);
|
||||
}
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (k = 0; k < 20; k++)
|
||||
|
||||
Reference in New Issue
Block a user