git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7180 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -286,14 +286,14 @@ int AtomVecAtomicCuda::pack_exchange(int dim, double *buf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(max_nsend==0) grow_copylist(200);
|
if(max_nsend==0) grow_copylist(200);
|
||||||
|
|
||||||
int nsend_atoms = Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
int nsend_atoms = Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||||
|
|
||||||
if(nsend_atoms>max_nsend) {grow_copylist(nsend_atoms+100);}
|
if(nsend_atoms>max_nsend) {grow_copylist(nsend_atoms+100);}
|
||||||
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
|
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
|
||||||
{
|
{
|
||||||
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
|
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
|
||||||
Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
int nlocal=atom->nlocal-nsend_atoms;
|
int nlocal=atom->nlocal-nsend_atoms;
|
||||||
@ -395,6 +395,7 @@ int AtomVecAtomicCuda::unpack_exchange(double *buf)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
cuda->shared_data.atom.nlocal=nlocal;
|
cuda->shared_data.atom.nlocal=nlocal;
|
||||||
|
if(atom->nlocal!=nlocal)
|
||||||
cuda->shared_data.atom.update_nlocal=2;
|
cuda->shared_data.atom.update_nlocal=2;
|
||||||
atom->nlocal=nlocal;
|
atom->nlocal=nlocal;
|
||||||
mfirst+=m;
|
mfirst+=m;
|
||||||
|
|||||||
@ -130,11 +130,11 @@ Cuda::Cuda(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
downloadtime=0;
|
downloadtime=0;
|
||||||
dotiming=false;
|
dotiming=false;
|
||||||
|
|
||||||
dotestatom = false;
|
dotestatom = false;
|
||||||
testatom = 0;
|
testatom = 0;
|
||||||
oncpu = true;
|
oncpu = true;
|
||||||
|
|
||||||
self_comm = 0;
|
self_comm = 0;
|
||||||
MYDBG( printf("# CUDA: Cuda::Cuda Done...\n");)
|
MYDBG( printf("# CUDA: Cuda::Cuda Done...\n");)
|
||||||
//cCudaData<double, float, yx >
|
//cCudaData<double, float, yx >
|
||||||
}
|
}
|
||||||
@ -267,10 +267,10 @@ void Cuda::accelerator(int narg, char** arg)
|
|||||||
cu_virial = 0;
|
cu_virial = 0;
|
||||||
cu_eatom = 0;
|
cu_eatom = 0;
|
||||||
cu_vatom = 0;
|
cu_vatom = 0;
|
||||||
cu_radius = 0;
|
cu_radius = 0;
|
||||||
cu_density = 0;
|
cu_density = 0;
|
||||||
cu_omega = 0;
|
cu_omega = 0;
|
||||||
cu_torque = 0;
|
cu_torque = 0;
|
||||||
|
|
||||||
cu_special = 0;
|
cu_special = 0;
|
||||||
cu_nspecial = 0;
|
cu_nspecial = 0;
|
||||||
@ -299,8 +299,11 @@ void Cuda::setSharedDataZero()
|
|||||||
shared_data.atom.q_flag = 0;
|
shared_data.atom.q_flag = 0;
|
||||||
shared_data.atom.need_eatom = 0;
|
shared_data.atom.need_eatom = 0;
|
||||||
shared_data.atom.need_vatom = 0;
|
shared_data.atom.need_vatom = 0;
|
||||||
|
shared_data.atom.update_nmax = 1;
|
||||||
|
shared_data.atom.update_nlocal = 1;
|
||||||
|
shared_data.atom.update_neigh = 1;
|
||||||
|
|
||||||
shared_data.pair.cudable_force = 0;
|
shared_data.pair.cudable_force = 0;
|
||||||
shared_data.pair.collect_forces_later = 0;
|
shared_data.pair.collect_forces_later = 0;
|
||||||
shared_data.pair.use_block_per_atom = 0;
|
shared_data.pair.use_block_per_atom = 0;
|
||||||
shared_data.pair.override_block_per_atom = -1;
|
shared_data.pair.override_block_per_atom = -1;
|
||||||
@ -429,14 +432,6 @@ void Cuda::checkResize()
|
|||||||
if(cu_atom->q_flag)
|
if(cu_atom->q_flag)
|
||||||
{delete cu_q; cu_q = new cCudaData<double, F_FLOAT, x > ((double*)atom->q, & cu_atom->q , atom->nmax );}// cu_q->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
|
{delete cu_q; cu_q = new cCudaData<double, F_FLOAT, x > ((double*)atom->q, & cu_atom->q , atom->nmax );}// cu_q->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
|
||||||
|
|
||||||
/*
|
|
||||||
if(force->pair)
|
|
||||||
if(force->pair->eatom)
|
|
||||||
{delete cu_eatom; cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > (force->pair->eatom, & cu_atom->eatom , atom->nmax );}// cu_eatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
|
|
||||||
if(force->pair)
|
|
||||||
if(force->pair->vatom)
|
|
||||||
{delete cu_vatom; cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)force->pair->vatom, & cu_atom->vatom , atom->nmax,6 );}// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
|
|
||||||
*/
|
|
||||||
if(atom->radius)
|
if(atom->radius)
|
||||||
{
|
{
|
||||||
delete cu_radius; cu_radius = new cCudaData<double, X_FLOAT, x > (atom->radius , & cu_atom->radius , atom->nmax );
|
delete cu_radius; cu_radius = new cCudaData<double, X_FLOAT, x > (atom->radius , & cu_atom->radius , atom->nmax );
|
||||||
@ -444,11 +439,6 @@ void Cuda::checkResize()
|
|||||||
delete cu_omega_rmass; cu_omega_rmass = new cCudaData<V_FLOAT, V_FLOAT, x> (omega_rmass , & cu_atom->omega_rmass , atom->nmax*4);
|
delete cu_omega_rmass; cu_omega_rmass = new cCudaData<V_FLOAT, V_FLOAT, x> (omega_rmass , & cu_atom->omega_rmass , atom->nmax*4);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
if(atom->density)
|
|
||||||
{delete cu_density; cu_density = new cCudaData<double, F_FLOAT, x > (atom->density , & cu_atom->density , atom->nmax );}
|
|
||||||
*/
|
|
||||||
|
|
||||||
if(atom->omega)
|
if(atom->omega)
|
||||||
{delete cu_omega; cu_omega = new cCudaData<double, V_FLOAT, yx > (((double*) atom->omega) , & cu_atom->omega , atom->nmax,3 );}
|
{delete cu_omega; cu_omega = new cCudaData<double, V_FLOAT, yx > (((double*) atom->omega) , & cu_atom->omega , atom->nmax,3 );}
|
||||||
|
|
||||||
@ -464,12 +454,10 @@ void Cuda::checkResize()
|
|||||||
shared_data.atom.special_flag = neighbor->special_flag;
|
shared_data.atom.special_flag = neighbor->special_flag;
|
||||||
shared_data.atom.molecular = atom->molecular;
|
shared_data.atom.molecular = atom->molecular;
|
||||||
|
|
||||||
cu_atom->update_nmax = 2;
|
cu_atom->update_nmax = 2;
|
||||||
cu_atom->nmax = atom->nmax;
|
cu_atom->nmax = atom->nmax;
|
||||||
|
|
||||||
//delete [] x_type; x_type = new X_FLOAT4[atom->nmax];
|
|
||||||
delete cu_x_type; cu_x_type = new cCudaData<X_FLOAT, X_FLOAT, x> (x_type , & cu_atom->x_type , atom->nmax*4);
|
delete cu_x_type; cu_x_type = new cCudaData<X_FLOAT, X_FLOAT, x> (x_type , & cu_atom->x_type , atom->nmax*4);
|
||||||
// shared_data.buffer_new = 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(((cu_xhold==NULL)||(cu_xhold->get_dim()[0]<neighbor->maxhold))&&neighbor->xhold)
|
if(((cu_xhold==NULL)||(cu_xhold->get_dim()[0]<neighbor->maxhold))&&neighbor->xhold)
|
||||||
@ -488,6 +476,12 @@ void Cuda::checkResize()
|
|||||||
{
|
{
|
||||||
cu_map_array = new cCudaData<int, int, x > (atom->get_map_array() , & cu_atom->map_array , atom->get_map_size() );
|
cu_map_array = new cCudaData<int, int, x > (atom->get_map_array() , & cu_atom->map_array , atom->get_map_size() );
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
if(cu_map_array->dev_size()/sizeof(int)<atom->get_map_size())
|
||||||
|
{
|
||||||
|
delete cu_map_array;
|
||||||
|
cu_map_array = new cCudaData<int, int, x > (atom->get_map_array() , & cu_atom->map_array , atom->get_map_size() );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -512,11 +506,6 @@ void Cuda::checkResize()
|
|||||||
if(atom->radius)
|
if(atom->radius)
|
||||||
if(cu_radius->get_host_data() != atom->radius) cu_radius->set_host_data((double*) (atom->radius));
|
if(cu_radius->get_host_data() != atom->radius) cu_radius->set_host_data((double*) (atom->radius));
|
||||||
|
|
||||||
/*
|
|
||||||
if(atom->density)
|
|
||||||
if(cu_density->get_host_data() != atom->density) cu_density->set_host_data((double*) (atom->density));
|
|
||||||
*/
|
|
||||||
|
|
||||||
if(atom->omega)
|
if(atom->omega)
|
||||||
if(cu_omega->get_host_data() != atom->omega) cu_omega->set_host_data((double*) (atom->omega));
|
if(cu_omega->get_host_data() != atom->omega) cu_omega->set_host_data((double*) (atom->omega));
|
||||||
|
|
||||||
@ -558,7 +547,7 @@ void Cuda::evsetup_eatom_vatom(int eflag_atom,int vflag_atom)
|
|||||||
if(not cu_vatom)
|
if(not cu_vatom)
|
||||||
cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)force->pair->vatom, & (shared_data.atom.vatom) , atom->nmax ,6 );// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
|
cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)force->pair->vatom, & (shared_data.atom.vatom) , atom->nmax ,6 );// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);}
|
||||||
cu_vatom->set_host_data((double*)force->pair->vatom);
|
cu_vatom->set_host_data((double*)force->pair->vatom);
|
||||||
cu_vatom->memset_device(0);
|
cu_vatom->memset_device(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -579,16 +568,9 @@ void Cuda::uploadAll()
|
|||||||
cu_image->upload();
|
cu_image->upload();
|
||||||
if(shared_data.atom.q_flag) cu_q ->upload();
|
if(shared_data.atom.q_flag) cu_q ->upload();
|
||||||
|
|
||||||
//printf("A3\n");
|
|
||||||
//if(shared_data.atom.need_eatom) cu_eatom->upload();
|
|
||||||
//printf("A4\n");
|
|
||||||
//if(shared_data.atom.need_vatom) cu_vatom->upload();
|
|
||||||
//printf("A5\n");
|
|
||||||
|
|
||||||
if(atom->rmass) cu_rmass->upload();
|
if(atom->rmass) cu_rmass->upload();
|
||||||
|
|
||||||
if(atom->radius) cu_radius->upload();
|
if(atom->radius) cu_radius->upload();
|
||||||
// if(atom->density) cu_density->upload();
|
|
||||||
if(atom->omega) cu_omega->upload();
|
if(atom->omega) cu_omega->upload();
|
||||||
if(atom->torque) cu_torque->upload();
|
if(atom->torque) cu_torque->upload();
|
||||||
if(atom->special) cu_special->upload();
|
if(atom->special) cu_special->upload();
|
||||||
@ -631,7 +613,6 @@ void Cuda::downloadAll()
|
|||||||
if(atom->rmass) cu_rmass->download();
|
if(atom->rmass) cu_rmass->download();
|
||||||
|
|
||||||
if(atom->radius) cu_radius->download();
|
if(atom->radius) cu_radius->download();
|
||||||
// if(atom->density) cu_density->download();
|
|
||||||
if(atom->omega) cu_omega->download();
|
if(atom->omega) cu_omega->download();
|
||||||
if(atom->torque) cu_torque->download();
|
if(atom->torque) cu_torque->download();
|
||||||
if(atom->special) cu_special->download();
|
if(atom->special) cu_special->download();
|
||||||
@ -747,13 +728,13 @@ void Cuda::setTimingsZero()
|
|||||||
shared_data.cuda_timings.neigh_special = 0;
|
shared_data.cuda_timings.neigh_special = 0;
|
||||||
|
|
||||||
//PPPM
|
//PPPM
|
||||||
shared_data.cuda_timings.pppm_particle_map;
|
shared_data.cuda_timings.pppm_particle_map = 0;
|
||||||
shared_data.cuda_timings.pppm_make_rho;
|
shared_data.cuda_timings.pppm_make_rho = 0;
|
||||||
shared_data.cuda_timings.pppm_brick2fft;
|
shared_data.cuda_timings.pppm_brick2fft = 0;
|
||||||
shared_data.cuda_timings.pppm_poisson;
|
shared_data.cuda_timings.pppm_poisson = 0;
|
||||||
shared_data.cuda_timings.pppm_fillbrick;
|
shared_data.cuda_timings.pppm_fillbrick = 0;
|
||||||
shared_data.cuda_timings.pppm_fieldforce;
|
shared_data.cuda_timings.pppm_fieldforce = 0;
|
||||||
shared_data.cuda_timings.pppm_compute;
|
shared_data.cuda_timings.pppm_compute = 0;
|
||||||
|
|
||||||
CudaWrapper_CheckUploadTime(true);
|
CudaWrapper_CheckUploadTime(true);
|
||||||
CudaWrapper_CheckDownloadTime(true);
|
CudaWrapper_CheckDownloadTime(true);
|
||||||
@ -789,8 +770,8 @@ void Cuda::print_timings()
|
|||||||
printf(" Exchange MPI \t %lf \n",shared_data.cuda_timings.comm_exchange_mpi);
|
printf(" Exchange MPI \t %lf \n",shared_data.cuda_timings.comm_exchange_mpi);
|
||||||
printf(" Exchange Kernel Pack \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_pack);
|
printf(" Exchange Kernel Pack \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_pack);
|
||||||
printf(" Exchange Kernel Unpack \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_unpack);
|
printf(" Exchange Kernel Unpack \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_unpack);
|
||||||
printf(" Exchange Kernel Fill \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_fill);
|
printf(" Exchange Kernel Fill \t %lf \n",shared_data.cuda_timings.comm_exchange_kernel_fill);
|
||||||
printf(" Exchange CPU Pack \t %lf \n",shared_data.cuda_timings.comm_exchange_cpu_pack);
|
printf(" Exchange CPU Pack \t %lf \n",shared_data.cuda_timings.comm_exchange_cpu_pack);
|
||||||
printf(" Exchange Upload \t %lf \n",shared_data.cuda_timings.comm_exchange_upload);
|
printf(" Exchange Upload \t %lf \n",shared_data.cuda_timings.comm_exchange_upload);
|
||||||
printf(" Exchange Download \t %lf \n",shared_data.cuda_timings.comm_exchange_download);
|
printf(" Exchange Download \t %lf \n",shared_data.cuda_timings.comm_exchange_download);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|||||||
@ -111,6 +111,7 @@ void CudaNeighList::dev_alloc()
|
|||||||
neighbors_inner = new int[sneighlist.maxlocal*sneighlist.maxneighbors];
|
neighbors_inner = new int[sneighlist.maxlocal*sneighlist.maxneighbors];
|
||||||
cu_neighbors_inner = new cCudaData<int, int, x> (neighbors_inner , & sneighlist.neighbors_inner , sneighlist.maxlocal*sneighlist.maxneighbors );
|
cu_neighbors_inner = new cCudaData<int, int, x> (neighbors_inner , & sneighlist.neighbors_inner , sneighlist.maxlocal*sneighlist.maxneighbors );
|
||||||
}
|
}
|
||||||
|
cuda->shared_data.atom.update_neigh=2;
|
||||||
MYDBG( printf("# CUDA: CudaNeighList::dev_alloc() ... end\n"); )
|
MYDBG( printf("# CUDA: CudaNeighList::dev_alloc() ... end\n"); )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -250,6 +250,7 @@ void NeighborCuda::full_bin_cuda(NeighList *list)
|
|||||||
}*/
|
}*/
|
||||||
list->cuda_list->cu_numneigh->download();
|
list->cuda_list->cu_numneigh->download();
|
||||||
list->cuda_list->cu_ilist->download();
|
list->cuda_list->cu_ilist->download();
|
||||||
|
cuda->shared_data.atom.update_neigh=2;
|
||||||
//printf("Done\n");
|
//printf("Done\n");
|
||||||
|
|
||||||
MYDBG(printf(" # CUDA::NeighFullBinCuda ... end\n");)
|
MYDBG(printf(" # CUDA::NeighFullBinCuda ... end\n");)
|
||||||
|
|||||||
@ -564,6 +564,7 @@ void VerletCuda::run(int n)
|
|||||||
cuda->shared_data.atom.reneigh_flag=0;
|
cuda->shared_data.atom.reneigh_flag=0;
|
||||||
cuda->shared_data.atom.update_nlocal=1;
|
cuda->shared_data.atom.update_nlocal=1;
|
||||||
cuda->shared_data.atom.update_nmax=1;
|
cuda->shared_data.atom.update_nmax=1;
|
||||||
|
cuda->shared_data.atom.update_neigh=1;
|
||||||
cuda->shared_data.domain.update=1;
|
cuda->shared_data.domain.update=1;
|
||||||
cuda->shared_data.buffer_new=1;
|
cuda->shared_data.buffer_new=1;
|
||||||
cuda->uploadtime=0;
|
cuda->uploadtime=0;
|
||||||
@ -627,14 +628,12 @@ void VerletCuda::run(int n)
|
|||||||
|
|
||||||
//start force calculation asynchronus
|
//start force calculation asynchronus
|
||||||
cuda->shared_data.comm.comm_phase=1;
|
cuda->shared_data.comm.comm_phase=1;
|
||||||
// printf("Pre Force Compute\n");
|
|
||||||
force->pair->compute(eflag, vflag);
|
force->pair->compute(eflag, vflag);
|
||||||
timer->stamp(TIME_PAIR);
|
timer->stamp(TIME_PAIR);
|
||||||
//CudaWrapper_Sync();
|
//CudaWrapper_Sync();
|
||||||
|
|
||||||
//download comm buffers from GPU, perform MPI communication and upload buffers again
|
//download comm buffers from GPU, perform MPI communication and upload buffers again
|
||||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||||
// printf("Pre forward_comm(2)\n");
|
|
||||||
comm->forward_comm(2);
|
comm->forward_comm(2);
|
||||||
clock_gettime(CLOCK_REALTIME,&endtime);
|
clock_gettime(CLOCK_REALTIME,&endtime);
|
||||||
cuda->shared_data.cuda_timings.comm_forward_total+=
|
cuda->shared_data.cuda_timings.comm_forward_total+=
|
||||||
@ -642,16 +641,13 @@ void VerletCuda::run(int n)
|
|||||||
timer->stamp(TIME_COMM);
|
timer->stamp(TIME_COMM);
|
||||||
|
|
||||||
//wait for force calculation
|
//wait for force calculation
|
||||||
//printf("Pre Synch\n");
|
|
||||||
CudaWrapper_Sync();
|
CudaWrapper_Sync();
|
||||||
timer->stamp(TIME_PAIR);
|
timer->stamp(TIME_PAIR);
|
||||||
|
|
||||||
//unpack communication buffers
|
//unpack communication buffers
|
||||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||||
// printf("Pre forward_comm(3)\n");
|
|
||||||
comm->forward_comm(3);
|
comm->forward_comm(3);
|
||||||
clock_gettime(CLOCK_REALTIME,&endtime);
|
clock_gettime(CLOCK_REALTIME,&endtime);
|
||||||
// printf("Post forward_comm(3)\n");
|
|
||||||
cuda->shared_data.cuda_timings.comm_forward_total+=
|
cuda->shared_data.cuda_timings.comm_forward_total+=
|
||||||
endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
|
endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
|
||||||
|
|
||||||
@ -663,11 +659,9 @@ void VerletCuda::run(int n)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
//perform standard forward communication
|
//perform standard forward communication
|
||||||
//printf("Forward_comm\n");
|
|
||||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||||
comm->forward_comm();
|
comm->forward_comm();
|
||||||
clock_gettime(CLOCK_REALTIME,&endtime);
|
clock_gettime(CLOCK_REALTIME,&endtime);
|
||||||
//printf("Forward_comm_done\n");
|
|
||||||
cuda->shared_data.cuda_timings.comm_forward_total+=
|
cuda->shared_data.cuda_timings.comm_forward_total+=
|
||||||
endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
|
endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
|
||||||
timer->stamp(TIME_COMM);
|
timer->stamp(TIME_COMM);
|
||||||
@ -677,13 +671,13 @@ void VerletCuda::run(int n)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
int nlocalold=cuda->shared_data.atom.nlocal;
|
int nlocalold=cuda->shared_data.atom.nlocal;
|
||||||
//if(firstreneigh)
|
if(firstreneigh)
|
||||||
{
|
{
|
||||||
cuda->shared_data.atom.update_nlocal=1;
|
cuda->shared_data.atom.update_nlocal=1;
|
||||||
cuda->shared_data.atom.update_nmax=1;
|
cuda->shared_data.atom.update_nmax=1;
|
||||||
firstreneigh=0;
|
firstreneigh=0;
|
||||||
}
|
}
|
||||||
cuda->shared_data.buffer_new=1;
|
cuda->shared_data.buffer_new=1;
|
||||||
MYDBG( printf("# CUDA VerletCuda::iterate: neighbor\n"); )
|
MYDBG( printf("# CUDA VerletCuda::iterate: neighbor\n"); )
|
||||||
cuda->setDomainParams();
|
cuda->setDomainParams();
|
||||||
if(n_pre_exchange) modify->pre_exchange();
|
if(n_pre_exchange) modify->pre_exchange();
|
||||||
@ -759,10 +753,10 @@ void VerletCuda::run(int n)
|
|||||||
cuda->shared_data.cuda_timings.test2+=
|
cuda->shared_data.cuda_timings.test2+=
|
||||||
endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
|
endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
|
||||||
|
|
||||||
//rebuild neighbor list
|
//rebuild neighbor list
|
||||||
test_atom(testatom,"Pre Neighbor");
|
test_atom(testatom,"Pre Neighbor");
|
||||||
neighbor->build();
|
neighbor->build();
|
||||||
timer->stamp(TIME_NEIGHBOR);
|
timer->stamp(TIME_NEIGHBOR);
|
||||||
MYDBG( printf("# CUDA VerletCuda::iterate: neighbor done\n"); )
|
MYDBG( printf("# CUDA VerletCuda::iterate: neighbor done\n"); )
|
||||||
|
|
||||||
//if bonded interactions are used (in this case collect_forces_later is true), transfer data which only changes upon exchange/border routines from GPU to CPU
|
//if bonded interactions are used (in this case collect_forces_later is true), transfer data which only changes upon exchange/border routines from GPU to CPU
|
||||||
@ -772,7 +766,7 @@ void VerletCuda::run(int n)
|
|||||||
cuda->cu_tag->download();
|
cuda->cu_tag->download();
|
||||||
cuda->cu_type->download();
|
cuda->cu_type->download();
|
||||||
cuda->cu_mask->download();
|
cuda->cu_mask->download();
|
||||||
if(cuda->cu_q) cuda->cu_q->download();
|
if(cuda->cu_q) cuda->cu_q->download();
|
||||||
}
|
}
|
||||||
cuda->shared_data.comm.comm_phase=3;
|
cuda->shared_data.comm.comm_phase=3;
|
||||||
}
|
}
|
||||||
@ -969,14 +963,16 @@ void VerletCuda::run(int n)
|
|||||||
test_atom(testatom,"post output");
|
test_atom(testatom,"post output");
|
||||||
|
|
||||||
if(cuda->shared_data.atom.update_nlocal>0)
|
if(cuda->shared_data.atom.update_nlocal>0)
|
||||||
cuda->shared_data.atom.update_nlocal--;
|
cuda->shared_data.atom.update_nlocal--;
|
||||||
if(cuda->shared_data.atom.update_nmax>0)
|
if(cuda->shared_data.atom.update_nmax>0)
|
||||||
cuda->shared_data.atom.update_nmax--;
|
cuda->shared_data.atom.update_nmax--;
|
||||||
if(cuda->shared_data.domain.update>0)
|
if(cuda->shared_data.atom.update_neigh>0)
|
||||||
|
cuda->shared_data.atom.update_neigh--;
|
||||||
|
if(cuda->shared_data.domain.update>0)
|
||||||
cuda->shared_data.domain.update--;
|
cuda->shared_data.domain.update--;
|
||||||
if(cuda->shared_data.buffer_new>0)
|
if(cuda->shared_data.buffer_new>0)
|
||||||
cuda->shared_data.buffer_new--;
|
cuda->shared_data.buffer_new--;
|
||||||
cuda->shared_data.atom.reneigh_flag=0;
|
cuda->shared_data.atom.reneigh_flag=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -984,6 +980,7 @@ void VerletCuda::run(int n)
|
|||||||
cuda->downloadAllNeighborLists();
|
cuda->downloadAllNeighborLists();
|
||||||
cuda->shared_data.atom.update_nlocal=1;
|
cuda->shared_data.atom.update_nlocal=1;
|
||||||
cuda->shared_data.atom.update_nmax=1;
|
cuda->shared_data.atom.update_nmax=1;
|
||||||
|
cuda->shared_data.atom.update_neigh=1;
|
||||||
cuda->shared_data.buffer_new=1;
|
cuda->shared_data.buffer_new=1;
|
||||||
cuda->shared_data.domain.update=1;
|
cuda->shared_data.domain.update=1;
|
||||||
cuda->oncpu = true;
|
cuda->oncpu = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user