git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7180 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2011-10-24 17:49:41 +00:00
parent 5a03365db1
commit f3018d53bb
5 changed files with 50 additions and 69 deletions

View File

@ -130,11 +130,11 @@ Cuda::Cuda(LAMMPS *lmp) : Pointers(lmp)
downloadtime=0;
dotiming=false;
dotestatom = false;
testatom = 0;
dotestatom = false;
testatom = 0;
oncpu = true;
self_comm = 0;
self_comm = 0;
MYDBG( printf("# CUDA: Cuda::Cuda Done...\n");)
//cCudaData<double, float, yx >
}
@ -267,10 +267,10 @@ void Cuda::accelerator(int narg, char** arg)
cu_virial = 0;
cu_eatom = 0;
cu_vatom = 0;
cu_radius = 0;
cu_radius = 0;
cu_density = 0;
cu_omega = 0;
cu_torque = 0;
cu_omega = 0;
cu_torque = 0;
cu_special = 0;
cu_nspecial = 0;
@ -299,8 +299,11 @@ void Cuda::setSharedDataZero()
shared_data.atom.q_flag = 0;
shared_data.atom.need_eatom = 0;
shared_data.atom.need_vatom = 0;
shared_data.atom.update_nmax = 1;
shared_data.atom.update_nlocal = 1;
shared_data.atom.update_neigh = 1;
shared_data.pair.cudable_force = 0;
shared_data.pair.cudable_force = 0;
shared_data.pair.collect_forces_later = 0;
shared_data.pair.use_block_per_atom = 0;
shared_data.pair.override_block_per_atom = -1;
@ -429,14 +432,6 @@ void Cuda::checkResize()
if(cu_atom->q_flag)
{delete cu_q; cu_q = new cCudaData<double, F_FLOAT, x > ((double*)atom->q, & cu_atom->q , atom->nmax );}// cu_q->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
/*
if(force->pair)
if(force->pair->eatom)
{delete cu_eatom; cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > (force->pair->eatom, & cu_atom->eatom , atom->nmax );}// cu_eatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
if(force->pair)
if(force->pair->vatom)
{delete cu_vatom; cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)force->pair->vatom, & cu_atom->vatom , atom->nmax,6 );}// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
*/
if(atom->radius)
{
delete cu_radius; cu_radius = new cCudaData<double, X_FLOAT, x > (atom->radius , & cu_atom->radius , atom->nmax );
@ -444,11 +439,6 @@ void Cuda::checkResize()
delete cu_omega_rmass; cu_omega_rmass = new cCudaData<V_FLOAT, V_FLOAT, x> (omega_rmass , & cu_atom->omega_rmass , atom->nmax*4);
}
/*
if(atom->density)
{delete cu_density; cu_density = new cCudaData<double, F_FLOAT, x > (atom->density , & cu_atom->density , atom->nmax );}
*/
if(atom->omega)
{delete cu_omega; cu_omega = new cCudaData<double, V_FLOAT, yx > (((double*) atom->omega) , & cu_atom->omega , atom->nmax,3 );}
@ -464,12 +454,10 @@ void Cuda::checkResize()
shared_data.atom.special_flag = neighbor->special_flag;
shared_data.atom.molecular = atom->molecular;
cu_atom->update_nmax = 2;
cu_atom->nmax = atom->nmax;
cu_atom->update_nmax = 2;
cu_atom->nmax = atom->nmax;
//delete [] x_type; x_type = new X_FLOAT4[atom->nmax];
delete cu_x_type; cu_x_type = new cCudaData<X_FLOAT, X_FLOAT, x> (x_type , & cu_atom->x_type , atom->nmax*4);
// shared_data.buffer_new = 2;
}
if(((cu_xhold==NULL)||(cu_xhold->get_dim()[0]<neighbor->maxhold))&&neighbor->xhold)
@ -488,6 +476,12 @@ void Cuda::checkResize()
{
cu_map_array = new cCudaData<int, int, x > (atom->get_map_array() , & cu_atom->map_array , atom->get_map_size() );
}
else
if(cu_map_array->dev_size()/sizeof(int)<atom->get_map_size())
{
delete cu_map_array;
cu_map_array = new cCudaData<int, int, x > (atom->get_map_array() , & cu_atom->map_array , atom->get_map_size() );
}
}
@ -512,11 +506,6 @@ void Cuda::checkResize()
if(atom->radius)
if(cu_radius->get_host_data() != atom->radius) cu_radius->set_host_data((double*) (atom->radius));
/*
if(atom->density)
if(cu_density->get_host_data() != atom->density) cu_density->set_host_data((double*) (atom->density));
*/
if(atom->omega)
if(cu_omega->get_host_data() != atom->omega) cu_omega->set_host_data((double*) (atom->omega));
@ -558,7 +547,7 @@ void Cuda::evsetup_eatom_vatom(int eflag_atom,int vflag_atom)
if(not cu_vatom)
cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)force->pair->vatom, & (shared_data.atom.vatom) , atom->nmax ,6 );// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
cu_vatom->set_host_data((double*)force->pair->vatom);
cu_vatom->memset_device(0);
cu_vatom->memset_device(0);
}
}
@ -579,16 +568,9 @@ void Cuda::uploadAll()
cu_image->upload();
if(shared_data.atom.q_flag) cu_q ->upload();
//printf("A3\n");
//if(shared_data.atom.need_eatom) cu_eatom->upload();
//printf("A4\n");
//if(shared_data.atom.need_vatom) cu_vatom->upload();
//printf("A5\n");
if(atom->rmass) cu_rmass->upload();
if(atom->radius) cu_radius->upload();
// if(atom->density) cu_density->upload();
if(atom->omega) cu_omega->upload();
if(atom->torque) cu_torque->upload();
if(atom->special) cu_special->upload();
@ -631,7 +613,6 @@ void Cuda::downloadAll()
if(atom->rmass) cu_rmass->download();
if(atom->radius) cu_radius->download();
// if(atom->density) cu_density->download();
if(atom->omega) cu_omega->download();
if(atom->torque) cu_torque->download();
if(atom->special) cu_special->download();
@ -747,13 +728,13 @@ void Cuda::setTimingsZero()
shared_data.cuda_timings.neigh_special = 0;
//PPPM
shared_data.cuda_timings.pppm_particle_map;
shared_data.cuda_timings.pppm_make_rho;
shared_data.cuda_timings.pppm_brick2fft;
shared_data.cuda_timings.pppm_poisson;
shared_data.cuda_timings.pppm_fillbrick;
shared_data.cuda_timings.pppm_fieldforce;
shared_data.cuda_timings.pppm_compute;
shared_data.cuda_timings.pppm_particle_map = 0;
shared_data.cuda_timings.pppm_make_rho = 0;
shared_data.cuda_timings.pppm_brick2fft = 0;
shared_data.cuda_timings.pppm_poisson = 0;
shared_data.cuda_timings.pppm_fillbrick = 0;
shared_data.cuda_timings.pppm_fieldforce = 0;
shared_data.cuda_timings.pppm_compute = 0;
CudaWrapper_CheckUploadTime(true);
CudaWrapper_CheckDownloadTime(true);
@ -789,8 +770,8 @@ void Cuda::print_timings()
printf(" Exchange MPI \t %lf \n",shared_data.cuda_timings.comm_exchange_mpi);
printf(" Exchange Kernel Pack \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_pack);
printf(" Exchange Kernel Unpack \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_unpack);
printf(" Exchange Kernel Fill \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_fill);
printf(" Exchange CPU Pack \t %lf \n",shared_data.cuda_timings.comm_exchange_cpu_pack);
printf(" Exchange Kernel Fill \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_fill);
printf(" Exchange CPU Pack \t %lf \n",shared_data.cuda_timings.comm_exchange_cpu_pack);
printf(" Exchange Upload \t %lf \n",shared_data.cuda_timings.comm_exchange_upload);
printf(" Exchange Download \t %lf \n",shared_data.cuda_timings.comm_exchange_download);
printf("\n");