git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7426 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -53,9 +53,10 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
|
|||||||
const int nzhi_out, grdtyp **rho_coeff,
|
const int nzhi_out, grdtyp **rho_coeff,
|
||||||
grdtyp **vd_brick, const double slab_volfactor,
|
grdtyp **vd_brick, const double slab_volfactor,
|
||||||
const int nx_pppm, const int ny_pppm,
|
const int nx_pppm, const int ny_pppm,
|
||||||
const int nz_pppm, int &flag) {
|
const int nz_pppm, const bool split, int &flag) {
|
||||||
_max_bytes=10;
|
_max_bytes=10;
|
||||||
screen=_screen;
|
screen=_screen;
|
||||||
|
_kspace_split=split;
|
||||||
bool success=true;
|
bool success=true;
|
||||||
|
|
||||||
flag=device->init(*ans,nlocal,nall);
|
flag=device->init(*ans,nlocal,nall);
|
||||||
@ -359,10 +360,10 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
|
|||||||
time_interp.stop();
|
time_interp.stop();
|
||||||
|
|
||||||
ans->copy_answers(false,false,false,false);
|
ans->copy_answers(false,false,false,false);
|
||||||
device->add_ans_object(ans);
|
if (_kspace_split==false)
|
||||||
|
device->add_ans_object(ans);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
|
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
|
||||||
double PPPMT::host_memory_usage() const {
|
double PPPMT::host_memory_usage() const {
|
||||||
return device->atom.host_memory_usage()+
|
return device->atom.host_memory_usage()+
|
||||||
|
|||||||
@ -48,7 +48,8 @@ class PPPM {
|
|||||||
const int nxhi_out, const int nyhi_out, const int nzhi_out,
|
const int nxhi_out, const int nyhi_out, const int nzhi_out,
|
||||||
grdtyp **rho_coeff, grdtyp **vd_brick,
|
grdtyp **rho_coeff, grdtyp **vd_brick,
|
||||||
const double slab_volfactor, const int nx_pppm,
|
const double slab_volfactor, const int nx_pppm,
|
||||||
const int ny_pppm, const int nz_pppm, int &success);
|
const int ny_pppm, const int nz_pppm, const bool split,
|
||||||
|
int &success);
|
||||||
|
|
||||||
/// Check if there is enough storage for atom arrays and realloc if not
|
/// Check if there is enough storage for atom arrays and realloc if not
|
||||||
/** \param success set to false if insufficient memory **/
|
/** \param success set to false if insufficient memory **/
|
||||||
@ -174,7 +175,7 @@ class PPPM {
|
|||||||
UCL_Texture q_tex;
|
UCL_Texture q_tex;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool _allocated, _compiled, _precompute_done;
|
bool _allocated, _compiled, _precompute_done, _kspace_split;
|
||||||
int _block_size, _block_pencils, _pencil_size, _max_brick_atoms, _max_atoms;
|
int _block_size, _block_pencils, _pencil_size, _max_brick_atoms, _max_atoms;
|
||||||
double _max_bytes, _max_an_bytes;
|
double _max_bytes, _max_an_bytes;
|
||||||
double _cpu_idle_time;
|
double _cpu_idle_time;
|
||||||
|
|||||||
@ -36,7 +36,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
|||||||
const int nzhi_out, grdtyp **rho_coeff,
|
const int nzhi_out, grdtyp **rho_coeff,
|
||||||
grdtyp **vd_brick, const double slab_volfactor,
|
grdtyp **vd_brick, const double slab_volfactor,
|
||||||
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
||||||
int &success) {
|
const bool split, int &success) {
|
||||||
pppm.clear(0.0);
|
pppm.clear(0.0);
|
||||||
int first_gpu=pppm.device->first_device();
|
int first_gpu=pppm.device->first_device();
|
||||||
int last_gpu=pppm.device->last_device();
|
int last_gpu=pppm.device->last_device();
|
||||||
@ -60,7 +60,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
|||||||
if (world_me==0)
|
if (world_me==0)
|
||||||
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,nzlo_out,
|
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,nzlo_out,
|
||||||
nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
|
nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
|
||||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
|
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
|
||||||
|
|
||||||
pppm.device->world_barrier();
|
pppm.device->world_barrier();
|
||||||
if (message)
|
if (message)
|
||||||
@ -79,7 +79,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
|||||||
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,
|
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,
|
||||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
|
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
|
||||||
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
||||||
success);
|
split,success);
|
||||||
|
|
||||||
pppm.device->gpu_barrier();
|
pppm.device->gpu_barrier();
|
||||||
if (message)
|
if (message)
|
||||||
@ -97,11 +97,12 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
|
|||||||
const int nzhi_out, float **rho_coeff,
|
const int nzhi_out, float **rho_coeff,
|
||||||
float **vd_brick, const double slab_volfactor,
|
float **vd_brick, const double slab_volfactor,
|
||||||
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
||||||
int &success) {
|
const bool split, int &success) {
|
||||||
float *b=pppm_gpu_init(PPPMF,nlocal,nall,screen,order,nxlo_out,nylo_out,
|
float *b=pppm_gpu_init(PPPMF,nlocal,nall,screen,order,nxlo_out,nylo_out,
|
||||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
|
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
|
||||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
|
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
|
||||||
PPPMF.device->set_single_precompute(&PPPMF);
|
if (split==false)
|
||||||
|
PPPMF.device->set_single_precompute(&PPPMF);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,13 +119,19 @@ int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void pppm_gpu_interp_f(const float qqrd2e_scale) {
|
void pppm_gpu_interp_f(const float qqrd2e_scale) {
|
||||||
return PPPMF.interp(qqrd2e_scale);
|
PPPMF.interp(qqrd2e_scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
double pppm_gpu_bytes_f() {
|
double pppm_gpu_bytes_f() {
|
||||||
return PPPMF.host_memory_usage();
|
return PPPMF.host_memory_usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void pppm_gpu_forces_f(double **f) {
|
||||||
|
double etmp;
|
||||||
|
PPPMF.atom->data_unavail();
|
||||||
|
PPPMF.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
|
||||||
|
}
|
||||||
|
|
||||||
double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
|
double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
|
||||||
const int order, const int nxlo_out,
|
const int order, const int nxlo_out,
|
||||||
const int nylo_out, const int nzlo_out,
|
const int nylo_out, const int nzlo_out,
|
||||||
@ -132,12 +139,13 @@ double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
|
|||||||
const int nzhi_out, double **rho_coeff,
|
const int nzhi_out, double **rho_coeff,
|
||||||
double **vd_brick, const double slab_volfactor,
|
double **vd_brick, const double slab_volfactor,
|
||||||
const int nx_pppm, const int ny_pppm,
|
const int nx_pppm, const int ny_pppm,
|
||||||
const int nz_pppm, int &success) {
|
const int nz_pppm, const bool split, int &success) {
|
||||||
double *b=pppm_gpu_init(PPPMD,nlocal,nall,screen,order,nxlo_out,nylo_out,
|
double *b=pppm_gpu_init(PPPMD,nlocal,nall,screen,order,nxlo_out,nylo_out,
|
||||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
|
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
|
||||||
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
||||||
success);
|
split,success);
|
||||||
PPPMF.device->set_double_precompute(&PPPMD);
|
if (split==false)
|
||||||
|
PPPMD.device->set_double_precompute(&PPPMD);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -154,10 +162,16 @@ int pppm_gpu_spread_d(const int ago, const int nlocal, const int nall,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void pppm_gpu_interp_d(const double qqrd2e_scale) {
|
void pppm_gpu_interp_d(const double qqrd2e_scale) {
|
||||||
return PPPMD.interp(qqrd2e_scale);
|
PPPMD.interp(qqrd2e_scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
double pppm_gpu_bytes_d() {
|
double pppm_gpu_bytes_d() {
|
||||||
return PPPMD.host_memory_usage();
|
return PPPMD.host_memory_usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void pppm_gpu_forces_d(double **f) {
|
||||||
|
double etmp;
|
||||||
|
PPPMD.atom->data_unavail();
|
||||||
|
PPPMD.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user