Adding additional charge spreading kernels

This commit is contained in:
W. Michael Brown
2011-02-10 17:10:20 -05:00
parent 6c28a2c117
commit d7fdcef6ff
2 changed files with 223 additions and 6 deletions

View File

@ -181,7 +181,7 @@ void PPPMGPUMemoryT::clear() {
device->clear();
}
/*
// ---------------------------------------------------------------------------
// Copy nbor list from host if necessary and then calculate forces, virials,..
// ---------------------------------------------------------------------------
@ -237,9 +237,6 @@ int PPPMGPUMemoryT::compute(const int ago, const int nlocal, const int nall,
double delvolinv = delxinv*delyinv*delzinv;
numtyp f_delvolinv = delvolinv;
std::cout << "BOX: " << boxlo[0] << " " << boxlo[1] << " " << boxlo[2] << "\n";
std::cout << "Brick: " << f_brick_x << " " << f_brick_y << " " << f_brick_z << "\n";
std::cout << "Delx: " << 1.0/delxinv << " " << 1.0/delyinv << " " << 1.0/delzinv << std::endl;
time_map.start();
d_brick_counts.zero();
k_particle_map.set_size(GX,BX);
@ -249,7 +246,6 @@ std::cout << "Delx: " << 1.0/delxinv << " " << 1.0/delyinv << " " << 1.0/delzinv
&_nlocal_y, &_nlocal_z, &_atom_stride, &_max_brick_atoms,
&d_error_flag.begin());
time_map.stop();
//std::cout << "COUNTS: " << d_brick_counts << std::endl;
if (_order % 2)
shift=0.0;
@ -291,6 +287,84 @@ std::cout << "Delx: " << 1.0/delxinv << " " << 1.0/delyinv << " " << 1.0/delzinv
return h_error_flag[0];
}
*/
// ---------------------------------------------------------------------------
// Copy nbor list from host if necessary and then calculate forces, virials,..
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int PPPMGPUMemoryT::compute(const int ago, const int nlocal, const int nall,
double **host_x, int *host_type, bool &success,
double *host_q, double *boxlo,
const double delxinv, const double delyinv,
const double delzinv) {
acc_timers();
if (nlocal==0) {
zero_timers();
return 0;
}
ans->inum(nlocal);
if (ago==0) {
resize_atom(nlocal,nall,success);
resize_local(nlocal,success);
if (!success)
return 0;
double bytes=ans->gpu_bytes();
if (bytes>_max_an_bytes)
_max_an_bytes=bytes;
}
atom->cast_x_data(host_x,host_type);
atom->cast_q_data(host_q);
atom->add_x_data(host_x,host_type);
atom->add_q_data();
// Compute the block size and grid size to keep all cores busy
int BX=this->block_size();
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
int ainum=this->ans->inum();
// Boxlo adjusted to be upper left brick and shift for even stencil order
double shift=0.0;
numtyp shift2=-0.5;
if (_order % 2) {
shift=0.5;
shift2=0.5;
}
numtyp f_brick_x=boxlo[0]+(_nxlo_out-_nlower-shift)/delxinv;
numtyp f_brick_y=boxlo[1]+(_nylo_out-_nlower-shift)/delyinv;
numtyp f_brick_z=boxlo[2]+(_nzlo_out-_nlower-shift)/delzinv;
numtyp f_delxinv=delxinv;
numtyp f_delyinv=delyinv;
numtyp f_delzinv=delzinv;
double delvolinv = delxinv*delyinv*delzinv;
numtyp f_delvolinv = delvolinv;
time_map.start();
time_map.stop();
time_rho.start();
d_brick.zero();
k_make_rho.set_size(GX,BX);
k_make_rho.run(&atom->dev_x.begin(), &atom->dev_q.begin(), &ainum,
&d_brick.begin(), &d_rho_coeff.begin(), &_npts_x,
&_npts_y, &_nlocal_x, &_nlocal_y, &_nlocal_z, &f_brick_x,
&f_brick_y, &f_brick_z, &f_delxinv, &f_delyinv, &f_delzinv,
&shift2, &_order, &f_delvolinv, &d_error_flag.begin());
time_rho.stop();
time_out.start();
ucl_copy(h_brick,d_brick,true);
ucl_copy(h_error_flag,d_error_flag,false);
time_out.stop();
return h_error_flag[0];
}
template <class numtyp, class acctyp>
double PPPMGPUMemoryT::host_memory_usage() const {
@ -308,7 +382,7 @@ void PPPMGPUMemoryT::compile_kernels(UCL_Device &dev) {
pppm_program=new UCL_Program(dev);
pppm_program->load_string(pppm_gpu_kernel,flags.c_str());
k_particle_map.set_function(*pppm_program,"particle_map");
k_make_rho.set_function(*pppm_program,"make_rho");
k_make_rho.set_function(*pppm_program,"make_rho2");
pos_tex.get_texture(*pppm_program,"pos_tex");
q_tex.get_texture(*pppm_program,"q_tex");