Made some progress with fphi_uind in the gpu pair style
This commit is contained in:
@ -14,7 +14,7 @@
|
||||
// ***************************************************************************
|
||||
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
//#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifdef LAMMPS_SMALLBIG
|
||||
#define tagint int
|
||||
@ -1630,14 +1630,19 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
__global numtyp *restrict fdip_phi2,
|
||||
__global numtyp *restrict fdip_sum_phi,
|
||||
const int bsorder, const int inum,
|
||||
const int nyzgrid, const int nygrid,
|
||||
const int t_per_atom)
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out,
|
||||
const int ngridxy, const int ngridx)
|
||||
{
|
||||
int tid, ii, offset, i, n_stride;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
//int tid, ii, offset, i, n_stride;
|
||||
//atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=tid+BLOCK_ID_X*BLOCK_SIZE_X;
|
||||
|
||||
if (ii<inum) {
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
|
||||
|
||||
int j,k,m;
|
||||
numtyp v0,v1,v2,v3;
|
||||
@ -1706,7 +1711,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
tuv012 = (numtyp)0.0;
|
||||
tuv111 = (numtyp)0.0;
|
||||
|
||||
k = igrid[4*i+2] - nlpts;
|
||||
k = igrid[4*ii+2] - nzlo_out - nlpts;
|
||||
for (int kb = 0; kb < bsorder; kb++) {
|
||||
/*
|
||||
v0 = thetai3[m][kb][0];
|
||||
@ -1714,9 +1719,9 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
v2 = thetai3[m][kb][2];
|
||||
v3 = thetai3[m][kb][3];
|
||||
*/
|
||||
int i3 = i*4*bsorder + 4*kb;
|
||||
int i3 = ii*4*bsorder + 4*kb;
|
||||
v0 = thetai3[i3];
|
||||
v1 = thetai3[i3]+1;
|
||||
v1 = thetai3[i3+1];
|
||||
v2 = thetai3[i3+2];
|
||||
v3 = thetai3[i3+3];
|
||||
tu00_1 = (numtyp)0.0;
|
||||
@ -1742,7 +1747,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
tu12 = (numtyp)0.0;
|
||||
tu03 = (numtyp)0.0;
|
||||
|
||||
j = igrid[4*i+1] - nlpts;
|
||||
j = igrid[4*ii+1] - nylo_out - nlpts;
|
||||
for (int jb = 0; jb < bsorder; jb++) {
|
||||
/*
|
||||
u0 = thetai2[m][jb][0];
|
||||
@ -1750,7 +1755,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
u2 = thetai2[m][jb][2];
|
||||
u3 = thetai2[m][jb][3];
|
||||
*/
|
||||
int i2 = i*4*bsorder+4*jb;
|
||||
int i2 = ii*4*bsorder+4*jb;
|
||||
u0 = thetai2[i2];
|
||||
u1 = thetai2[i2+1];
|
||||
u2 = thetai2[i2+2];
|
||||
@ -1763,11 +1768,11 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
t2_2 = (numtyp)0.0;
|
||||
t3 = (numtyp)0.0;
|
||||
|
||||
int ii = igrid[4*i] - nlpts;
|
||||
int i = igrid[4*ii] - nxlo_out - nlpts;
|
||||
for (int ib = 0; ib < bsorder; ib++) {
|
||||
/*
|
||||
tq_1 = grid[k][j][ii][0];
|
||||
tq_2 = grid[k][j][ii][1];
|
||||
tq_1 = grid[k][j][i][0];
|
||||
tq_2 = grid[k][j][i][1];
|
||||
t0_1 += tq_1*thetai1[m][ib][0];
|
||||
t1_1 += tq_1*thetai1[m][ib][1];
|
||||
t2_1 += tq_1*thetai1[m][ib][2];
|
||||
@ -1776,14 +1781,19 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
t2_2 += tq_2*thetai1[m][ib][2];
|
||||
t3 += (tq_1+tq_2)*thetai1[m][ib][3];
|
||||
*/
|
||||
int i1 = i*4*bsorder+4*ib;
|
||||
int i1 = ii*4*bsorder+4*ib;
|
||||
numtyp w0 = thetai1[i1];
|
||||
numtyp w1 = thetai1[i1+1];
|
||||
numtyp w2 = thetai1[i1+2];
|
||||
numtyp w3 = thetai1[i1+3];
|
||||
int gidx = 2*(k*nyzgrid + j*nygrid + ii);
|
||||
int gidx = 2*(k*ngridxy + j*ngridx + i);
|
||||
tq_1 = grid[gidx];
|
||||
tq_2 = grid[gidx+1];
|
||||
/*
|
||||
if (ii == 0 && jb == 0 && kb == 0)
|
||||
printf("ii = 0: igrid %d %d %d; grid %f %f; k = %d j = %d; i = %d; origin = %f %f; gidx = %d\n",
|
||||
igrid[4*ii+0], igrid[4*ii+1], igrid[4*ii+2], tq_1, tq_2, k, j, i, grid[0], grid[1], gidx);
|
||||
*/
|
||||
t0_1 += tq_1*w0;
|
||||
t1_1 += tq_1*w1;
|
||||
t2_1 += tq_1*w2;
|
||||
@ -1791,7 +1801,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
t1_2 += tq_2*w1;
|
||||
t2_2 += tq_2*w2;
|
||||
t3 += (tq_1+tq_2)*w3;
|
||||
ii++;
|
||||
i++;
|
||||
}
|
||||
|
||||
tu00_1 += t0_1*u0;
|
||||
@ -1875,7 +1885,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
fdip_phi1[m][8] = tuv101_1;
|
||||
fdip_phi1[m][9] = tuv011_1;
|
||||
*/
|
||||
int idx = 10*m;
|
||||
int idx = 10*ii;
|
||||
fdip_phi1[idx+0] = (numtyp)0.0;
|
||||
fdip_phi1[idx+1] = tuv100_1;
|
||||
fdip_phi1[idx+2] = tuv010_1;
|
||||
@ -1886,7 +1896,18 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
fdip_phi1[idx+7] = tuv110_1;
|
||||
fdip_phi1[idx+8] = tuv101_1;
|
||||
fdip_phi1[idx+9] = tuv011_1;
|
||||
|
||||
/*
|
||||
fdip_phi2[m][0] = 0.0;
|
||||
fdip_phi2[m][1] = tuv100_2;
|
||||
fdip_phi2[m][2] = tuv010_2;
|
||||
fdip_phi2[m][3] = tuv001_2;
|
||||
fdip_phi2[m][4] = tuv200_2;
|
||||
fdip_phi2[m][5] = tuv020_2;
|
||||
fdip_phi2[m][6] = tuv002_2;
|
||||
fdip_phi2[m][7] = tuv110_2;
|
||||
fdip_phi2[m][8] = tuv101_2;
|
||||
fdip_phi2[m][9] = tuv011_2;
|
||||
*/
|
||||
fdip_phi2[idx+0] = (numtyp)0.0;
|
||||
fdip_phi2[idx+1] = tuv100_2;
|
||||
fdip_phi2[idx+2] = tuv010_2;
|
||||
@ -1898,7 +1919,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
fdip_phi2[idx+8] = tuv101_2;
|
||||
fdip_phi2[idx+9] = tuv011_2;
|
||||
|
||||
idx = 20*m;
|
||||
idx = 20*ii;
|
||||
fdip_sum_phi[idx+0] = tuv000;
|
||||
fdip_sum_phi[idx+1] = tuv100;
|
||||
fdip_sum_phi[idx+2] = tuv010;
|
||||
|
||||
@ -165,12 +165,12 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double *
|
||||
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double *host_grid_brick_start, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
double *host_grid_brick_start, double ****host_grid_brick,
|
||||
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
||||
int nxlo_out, int nxhi_out, bool& first_iteration) {
|
||||
AMOEBAMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
|
||||
host_thetai3, igrid, host_grid_brick_start, host_fdip_phi1,
|
||||
host_thetai3, igrid, host_grid_brick_start, host_grid_brick, host_fdip_phi1,
|
||||
host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
|
||||
nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
|
||||
}
|
||||
|
||||
@ -555,7 +555,7 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double
|
||||
// Prepare for umutual1() after bspline_fill() is done on host
|
||||
// - reallocate per-atom arrays, thetai1, thetai2, thetai3, and igrid if needed
|
||||
// host_thetai1, host_thetai2, host_thetai3 are allocated with nmax by bsordermax by 4
|
||||
// host_igrid is allocated with nmax by by 4
|
||||
// host_igrid is allocated with nmax by 4
|
||||
// - transfer extra data from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@ -563,8 +563,9 @@ template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** host_igrid,
|
||||
double* host_grid_brick_start, int nzlo_out,
|
||||
int nzhi_out, int nylo_out, int nyhi_out,
|
||||
double* host_grid_brick_start, double**** host_grid_brick,
|
||||
int nzlo_out, int nzhi_out,
|
||||
int nylo_out, int nyhi_out,
|
||||
int nxlo_out, int nxhi_out) {
|
||||
|
||||
_bsorder = bsorder;
|
||||
@ -599,7 +600,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
}
|
||||
|
||||
UCL_H_Vec<double> dview;
|
||||
dview.alloc(inum_full*bsorder*4,*(this->ucl_device));
|
||||
dview.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device));
|
||||
|
||||
// pack host data to device
|
||||
|
||||
@ -634,7 +635,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
ucl_copy(_thetai3,dview,false);
|
||||
|
||||
UCL_H_Vec<int> dview_int;
|
||||
dview_int.alloc(inum_full*4, *(this->ucl_device));
|
||||
dview_int.alloc(_max_thetai_size*4, *(this->ucl_device));
|
||||
for (int i = 0; i < inum_full; i++) {
|
||||
int idx = i*4;
|
||||
dview_int[idx+0] = host_igrid[i][0];
|
||||
@ -643,6 +644,33 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
}
|
||||
ucl_copy(_igrid, dview_int, false);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// fphi_uind = induced potential from grid
|
||||
// fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double *host_grid_brick_start, double ****host_grid_brick,
|
||||
void** host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
||||
int nxlo_out, int nxhi_out, bool& first_iteration)
|
||||
{
|
||||
// allocation/resize and transfers before the first iteration
|
||||
|
||||
if (first_iteration) {
|
||||
precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, host_thetai3,
|
||||
igrid, host_grid_brick_start, host_grid_brick, nzlo_out, nzhi_out,
|
||||
nylo_out, nyhi_out, nxlo_out, nxhi_out);
|
||||
if (first_iteration) first_iteration = false;
|
||||
}
|
||||
|
||||
// update the cgrid_brick with data host
|
||||
|
||||
_nzlo_out = nzlo_out;
|
||||
@ -656,36 +684,27 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
_ngridx = nxhi_out - nxlo_out + 1;
|
||||
_num_grid_points = _ngridx * _ngridy * _ngridz;
|
||||
|
||||
UCL_H_Vec<double> dview_cgrid;
|
||||
dview_cgrid.view(host_grid_brick_start, _num_grid_points*2, *(this->ucl_device));
|
||||
UCL_H_Vec<double> hview_cgrid;
|
||||
hview_cgrid.alloc(_num_grid_points*2, *(this->ucl_device), UCL_READ_WRITE);
|
||||
int n = 0;
|
||||
for (int iz = nzlo_out; iz <= nzhi_out; iz++)
|
||||
for (int iy = nylo_out; iy <= nyhi_out; iy++)
|
||||
for (int ix = nxlo_out; ix <= nxhi_out; ix++) {
|
||||
/*
|
||||
if (iz == nzlo_out && iy == nylo_out && ix == nxlo_out) {
|
||||
printf("origin = %d %d %d: grid = %f %f %f\n", iz, iy, ix, host_grid_brick[iz][iy][ix][0], host_grid_brick[iz][iy][ix][1]);
|
||||
}
|
||||
if (iz == -2 && iy == 4 && ix == 8) printf("ixyz = %d %d %d: grid = %f %f %f; n = %d\n", iz, iy, ix, host_grid_brick[iz][iy][ix][0], host_grid_brick[iz][iy][ix][1], n);
|
||||
*/
|
||||
hview_cgrid[n] = host_grid_brick[iz][iy][ix][0];
|
||||
hview_cgrid[n+1] = host_grid_brick[iz][iy][ix][1];
|
||||
n += 2;
|
||||
}
|
||||
//hview_cgrid.view(host_grid_brick_start, _num_grid_points*2, *(this->ucl_device));
|
||||
_cgrid_brick.alloc(_num_grid_points*2, *(this->ucl_device), UCL_READ_ONLY);
|
||||
ucl_copy(_cgrid_brick,dview_cgrid,false);
|
||||
ucl_copy(_cgrid_brick,hview_cgrid,false);
|
||||
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// fphi_uind = induced potential from grid
|
||||
// fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double *host_grid_brick_start, void** host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
||||
int nxlo_out, int nxhi_out, bool& first_iteration)
|
||||
{
|
||||
// allocation/resize and transfers before the first iteration
|
||||
|
||||
if (first_iteration) {
|
||||
precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, host_thetai3,
|
||||
igrid, host_grid_brick_start, nzlo_out, nzhi_out,
|
||||
nylo_out, nyhi_out, nxlo_out, nxhi_out);
|
||||
if (first_iteration) first_iteration = false;
|
||||
}
|
||||
|
||||
const int red_blocks = fphi_uind();
|
||||
|
||||
_fdip_phi1.update_host(_max_thetai_size*10);
|
||||
@ -711,16 +730,16 @@ int BaseAmoebaT::fphi_uind() {
|
||||
|
||||
// Compute the block size and grid size to keep all cores busy
|
||||
const int BX=block_size();
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(ans->inum())/
|
||||
(BX/_threads_per_atom)));
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
|
||||
|
||||
time_pair.start();
|
||||
int ngridyz = _ngridy * _ngridz;
|
||||
int ngridxy = _ngridx * _ngridy;
|
||||
k_fphi_uind.set_size(GX,BX);
|
||||
k_fphi_uind.run(&atom->x, &_thetai1, &_thetai2, &_thetai3,
|
||||
&_igrid, &_cgrid_brick, &_fdip_phi1, &_fdip_phi2,
|
||||
&_fdip_sum_phi, &_bsorder, &ainum, &ngridyz, &_ngridy,
|
||||
&_threads_per_atom);
|
||||
&_fdip_sum_phi, &_bsorder, &ainum,
|
||||
&_nzlo_out, &_nzhi_out, &_nylo_out, &_nyhi_out,
|
||||
&_nxlo_out, &_nxhi_out, &ngridxy, &_ngridx);
|
||||
time_pair.stop();
|
||||
|
||||
return GX;
|
||||
|
||||
@ -153,8 +153,9 @@ class BaseAmoeba {
|
||||
virtual void precompute_induce(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double* grid_brick_start, int nzlo_out,
|
||||
int nzhi_out, int nylo_out, int nyhi_out,
|
||||
double *host_grid_brick_start, double ****host_grid_brick,
|
||||
int nzlo_out, int nzhi_out,
|
||||
int nylo_out, int nyhi_out,
|
||||
int nxlo_out, int nxhi_out);
|
||||
|
||||
/// Compute multipole real-space with device neighboring
|
||||
@ -182,8 +183,8 @@ class BaseAmoeba {
|
||||
virtual void compute_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double *host_grid_brick_start, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
double *host_grid_brick_start, double ****host_grid_brick,
|
||||
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
||||
int nxlo_out, int nxhi_out, bool& first_iteration);
|
||||
|
||||
|
||||
@ -91,7 +91,7 @@ void amoeba_gpu_update_fieldp(void **fieldp_ptr);
|
||||
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double *host_grid_brick_start, void **host_fdip_phi1,
|
||||
double *host_grid_brick_start, double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
||||
int nxlo_out, int nxhi_out, bool& first_iteration);
|
||||
@ -121,7 +121,7 @@ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
|
||||
gpu_multipole_real_ready = true; // need to be true for precompute()
|
||||
gpu_udirect2b_ready = true;
|
||||
gpu_umutual1_ready = true;
|
||||
gpu_fphi_uind_ready = false;
|
||||
gpu_fphi_uind_ready = true;
|
||||
gpu_umutual2b_ready = true;
|
||||
gpu_polar_real_ready = true; // need to be true for copying data from device back to host
|
||||
|
||||
@ -1139,7 +1139,7 @@ void PairAmoebaGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
||||
void* fdip_phi2_pinned = nullptr;
|
||||
void* fdip_sum_phi_pinned = nullptr;
|
||||
amoeba_gpu_fphi_uind(atom->nlocal, bsorder, thetai1, thetai2, thetai3,
|
||||
igrid, ic_kspace->grid_brick_start,
|
||||
igrid, ic_kspace->grid_brick_start, grid,
|
||||
&fdip_phi1_pinned, &fdip_phi2_pinned, &fdip_sum_phi_pinned,
|
||||
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
|
||||
ic_kspace->nylo_out, ic_kspace->nyhi_out,
|
||||
@ -1150,8 +1150,10 @@ void PairAmoebaGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
||||
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
int idx = 10 * i;
|
||||
for (int m = 0; m < 10; m++)
|
||||
fdip_phi1[i][m] = _fdip_phi1_ptr[idx+m];
|
||||
for (int m = 0; m < 10; m++) {
|
||||
fdip_phi1[i][m] = _fdip_phi1_ptr[idx+m];
|
||||
}
|
||||
if (i == 0) printf("gpu fdip phi1 = %f %f %f\n", fdip_phi1[i][0], fdip_phi1[i][1], fdip_phi1[i][2]);
|
||||
}
|
||||
|
||||
double *_fdip_phi2_ptr = (double *)fdip_phi2_pinned;
|
||||
@ -1159,6 +1161,7 @@ void PairAmoebaGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
||||
int idx = 10 * i;
|
||||
for (int m = 0; m < 10; m++)
|
||||
fdip_phi2[i][m] = _fdip_phi2_ptr[idx+m];
|
||||
if (i == 0) printf("gpu fdip phi2 = %f %f %f\n", fdip_phi2[i][0], fdip_phi2[i][1], fdip_phi2[i][2]);
|
||||
}
|
||||
|
||||
double *_fdip_sum_phi_ptr = (double *)fdip_sum_phi_pinned;
|
||||
@ -1166,6 +1169,7 @@ void PairAmoebaGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
||||
int idx = 20 * i;
|
||||
for (int m = 0; m < 20; m++)
|
||||
fdip_sum_phi[i][m] = _fdip_sum_phi_ptr[idx+m];
|
||||
if (i == 0) printf("gpu fdip sum phi = %f %f %f\n", fdip_sum_phi[i][0], fdip_sum_phi[i][1], fdip_sum_phi[i][2]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user