/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include #include #include "modify_cuda.h" #include "style_compute.h" #include "style_fix.h" #include "atom.h" #include "comm.h" #include "fix.h" #include "compute.h" #include "group.h" #include "update.h" #include "domain.h" #include "cuda.h" #include "memory.h" #include "error.h" #include "cuda_modify_flags.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace FixConstCuda; #define DELTA 4 #define BIG 1.0e20 /* ---------------------------------------------------------------------- */ ModifyCuda::ModifyCuda(LAMMPS *lmp) : Modify(lmp) { cuda = lmp->cuda; if(cuda == NULL) error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); n_initial_integrate_cuda = 0; n_post_integrate_cuda = 0; n_pre_exchange = 0; n_pre_neighbor_cuda = 0; n_pre_force_cuda = 0; n_post_force_cuda = 0; n_final_integrate_cuda = 0; n_end_of_step_cuda = 0; n_thermo_energy_cuda = 0; n_initial_integrate_host = 0; n_post_integrate_host = 0; n_pre_exchange = 0; n_pre_neighbor_host = 0; n_pre_force_host = 0; n_post_force_host = 0; n_final_integrate_host = 0; n_end_of_step_host = 0; n_thermo_energy_host = 0; list_initial_integrate_cuda = NULL; list_post_integrate_cuda = NULL; list_pre_exchange_cuda = NULL; list_pre_neighbor_cuda = NULL; list_pre_force_cuda = NULL; list_post_force_cuda = NULL; list_final_integrate_cuda = NULL; list_end_of_step_cuda = NULL; list_thermo_energy_cuda = NULL; end_of_step_every_cuda = NULL; } /* ---------------------------------------------------------------------- */ ModifyCuda::~ModifyCuda() { delete [] list_initial_integrate_cuda; delete [] list_post_integrate_cuda; delete [] list_pre_exchange_cuda; delete [] list_pre_neighbor_cuda; delete [] list_pre_force_cuda; delete [] list_post_force_cuda; delete [] list_final_integrate_cuda; delete [] list_end_of_step_cuda; delete [] list_thermo_energy_cuda; delete [] end_of_step_every_cuda; } /* ---------------------------------------------------------------------- initialize all fixes and computes ------------------------------------------------------------------------- */ void ModifyCuda::init() { int i,j; // delete storage of restart info since it is not valid after 1st run restart_deallocate(); // create lists of fixes to call at each stage of run list_init(INITIAL_INTEGRATE,n_initial_integrate,list_initial_integrate); list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate); list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange); list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor); list_init(PRE_FORCE,n_pre_force,list_pre_force); list_init(POST_FORCE,n_post_force,list_post_force); list_init(FINAL_INTEGRATE,n_final_integrate,list_final_integrate); list_init_end_of_step(END_OF_STEP,n_end_of_step,list_end_of_step); list_init_thermo_energy(THERMO_ENERGY,n_thermo_energy,list_thermo_energy); list_init(INITIAL_INTEGRATE_CUDA, n_initial_integrate_cuda, list_initial_integrate_cuda); list_init(POST_INTEGRATE_CUDA, n_post_integrate_cuda, list_post_integrate_cuda); list_init(PRE_EXCHANGE_CUDA, n_pre_exchange_cuda, list_pre_exchange_cuda); list_init(PRE_NEIGHBOR_CUDA, n_pre_neighbor_cuda, list_pre_neighbor_cuda); list_init(PRE_FORCE_CUDA, n_pre_force_cuda, list_pre_force_cuda); list_init(POST_FORCE_CUDA, n_post_force_cuda, list_post_force_cuda); list_init(FINAL_INTEGRATE_CUDA, n_final_integrate_cuda, list_final_integrate_cuda); list_init_end_of_step_cuda(END_OF_STEP_CUDA, n_end_of_step_cuda, list_end_of_step_cuda); list_init_thermo_energy(THERMO_ENERGY_CUDA, n_thermo_energy_cuda, list_thermo_energy_cuda); n_initial_integrate_host = n_initial_integrate; n_post_integrate_host = n_post_integrate; n_pre_exchange_host = n_pre_exchange; n_pre_neighbor_host = n_pre_neighbor; n_pre_force_host = n_pre_force; n_post_force_host = n_post_force; n_final_integrate_host = n_final_integrate; n_end_of_step_host = n_end_of_step; n_thermo_energy_host = n_thermo_energy; n_initial_integrate = n_initial_integrate_cuda+n_initial_integrate_host; n_post_integrate = n_post_integrate_cuda+n_post_integrate_host; n_pre_exchange = n_pre_exchange_cuda+n_pre_exchange_host; n_pre_neighbor = n_pre_neighbor_cuda+n_pre_neighbor_host; n_pre_force = n_pre_force_cuda+n_pre_force_host; n_post_force = n_post_force_cuda+n_post_force_host; n_final_integrate = n_final_integrate_cuda+n_final_integrate_host; n_end_of_step = n_end_of_step_cuda+n_end_of_step_host; n_thermo_energy = n_thermo_energy_cuda+n_thermo_energy_host; list_init(INITIAL_INTEGRATE_RESPA, n_initial_integrate_respa,list_initial_integrate_respa); list_init(POST_INTEGRATE_RESPA, n_post_integrate_respa,list_post_integrate_respa); list_init(POST_FORCE_RESPA, n_post_force_respa,list_post_force_respa); list_init(PRE_FORCE_RESPA, n_pre_force_respa,list_pre_force_respa); list_init(FINAL_INTEGRATE_RESPA, n_final_integrate_respa,list_final_integrate_respa); list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange); list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force); list_init(MIN_ENERGY,n_min_energy,list_min_energy); // init each fix // needs to come before compute init // this is b/c some computes call fix->dof() // FixRigid::dof() depends on its own init having been called for (i = 0; i < nfix; i++) fix[i]->init(); // set global flag if any fix has its restart_pbc flag set restart_pbc_any = 0; for (i = 0; i < nfix; i++) if (fix[i]->restart_pbc) restart_pbc_any = 1; // create list of computes that store invocation times list_init_compute(); // init each compute // set invoked_scalar,vector,etc to -1 to force new run to re-compute them // add initial timestep to all computes that store invocation times // since any of them may be invoked by initial thermo // do not clear out invocation times stored within a compute, // b/c some may be holdovers from previous run, like for ave fixes for (i = 0; i < ncompute; i++) { compute[i]->init(); compute[i]->invoked_scalar = -1; compute[i]->invoked_vector = -1; compute[i]->invoked_array = -1; compute[i]->invoked_peratom = -1; compute[i]->invoked_local = -1; } addstep_compute_all(update->ntimestep); // warn if any particle is time integrated more than once int nlocal = atom->nlocal; int *mask = atom->mask; int *flag = new int[nlocal]; for (i = 0; i < nlocal; i++) flag[i] = 0; int groupbit; for (i = 0; i < nfix; i++) { if (fix[i]->time_integrate == 0) continue; groupbit = fix[i]->groupbit; for (j = 0; j < nlocal; j++) if (mask[j] & groupbit) flag[j]++; } int check = 0; for (i = 0; i < nlocal; i++) if (flag[i] > 1) check = 1; delete [] flag; int checkall; MPI_Allreduce(&check,&checkall,1,MPI_INT,MPI_SUM,world); if (comm->me == 0 && checkall) error->warning(FLERR,"One or more atoms are time integrated more than once"); } /* ---------------------------------------------------------------------- 1st half of integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void ModifyCuda::initial_integrate(int vflag) { for(int i = 0; i < n_initial_integrate_cuda; i++) fix[list_initial_integrate_cuda[i]]->initial_integrate(vflag); if(n_initial_integrate_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_initial_integrate_host; i++) fix[list_initial_integrate[i]]->initial_integrate(vflag); cuda->uploadAll(); cuda->oncpu = false; } } /* ---------------------------------------------------------------------- post_integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void ModifyCuda::post_integrate() { for(int i = 0; i < n_post_integrate_cuda; i++) fix[list_post_integrate_cuda[i]]->post_integrate(); if(n_post_integrate_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_post_integrate_host; i++) fix[list_post_integrate[i]]->post_integrate(); cuda->uploadAll(); cuda->oncpu = false; } } /* ---------------------------------------------------------------------- pre_exchange call, only for relevant fixes ------------------------------------------------------------------------- */ void ModifyCuda::pre_exchange() { for(int i = 0; i < n_pre_exchange_cuda; i++) fix[list_pre_exchange_cuda[i]]->pre_exchange(); if(n_pre_exchange_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_pre_exchange_host; i++) fix[list_pre_exchange[i]]->pre_exchange(); cuda->uploadAll(); cuda->oncpu = false; } } /* ---------------------------------------------------------------------- pre_neighbor call, only for relevant fixes ------------------------------------------------------------------------- */ void ModifyCuda::pre_neighbor() { for(int i = 0; i < n_pre_neighbor_cuda; i++) fix[list_pre_neighbor_cuda[i]]->pre_neighbor(); if(n_pre_neighbor_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_pre_neighbor_host; i++) fix[list_pre_neighbor[i]]->pre_neighbor(); cuda->uploadAll(); cuda->oncpu = false; } } /* ---------------------------------------------------------------------- pre_force call, only for relevant fixes ------------------------------------------------------------------------- */ void ModifyCuda::setup_pre_force(int vflag) { for(int i = 0; i < n_pre_force_cuda; i++) fix[list_pre_force_cuda[i]]->pre_force(vflag); if(n_pre_force_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_pre_force_host; i++) fix[list_pre_force[i]]->pre_force(vflag); cuda->uploadAll(); cuda->oncpu = false; } } void ModifyCuda::pre_force(int vflag) { for(int i = 0; i < n_pre_force_cuda; i++) fix[list_pre_force_cuda[i]]->pre_force(vflag); if(n_pre_force_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_pre_force_host; i++) fix[list_pre_force[i]]->pre_force(vflag); cuda->uploadAll(); cuda->oncpu = false; } } /* ---------------------------------------------------------------------- post_force call, only for relevant fixes ------------------------------------------------------------------------- */ void ModifyCuda::post_force(int vflag) { for(int i = 0; i < n_post_force_cuda; i++) fix[list_post_force_cuda[i]]->post_force(vflag); if(n_post_force_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_post_force_host; i++) fix[list_post_force[i]]->post_force(vflag); cuda->uploadAll(); cuda->oncpu = false; } } /* ---------------------------------------------------------------------- 2nd half of integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void ModifyCuda::final_integrate() { for (int i = 0; i < n_final_integrate_cuda; i++) fix[list_final_integrate_cuda[i]]->final_integrate(); if(n_final_integrate_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_final_integrate_host; i++) fix[list_final_integrate[i]]->final_integrate(); cuda->uploadAll(); cuda->oncpu = false; } } /* ---------------------------------------------------------------------- end-of-timestep call, only for relevant fixes only call fix->end_of_step() on timesteps that are multiples of nevery ------------------------------------------------------------------------- */ void ModifyCuda::end_of_step() { for (int i = 0; i < n_end_of_step_cuda; i++) if (update->ntimestep % end_of_step_every_cuda[i] == 0) fix[list_end_of_step_cuda[i]]->end_of_step(); if(n_end_of_step_host != 0) { int do_thisstep=0; for (int i = 0; i < n_end_of_step_host; i++) if (update->ntimestep % end_of_step_every[i] == 0) do_thisstep=1; if(do_thisstep) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_end_of_step_host; i++) if (update->ntimestep % end_of_step_every[i] == 0) fix[list_end_of_step[i]]->end_of_step(); cuda->uploadAll(); cuda->oncpu = false; } } } /* ---------------------------------------------------------------------- thermo energy call, only for relevant fixes called by Thermo class compute_scalar() is fix call to return energy ------------------------------------------------------------------------- */ double ModifyCuda::thermo_energy() { double energy = 0.0; for (int i = 0; i < n_thermo_energy_cuda; i++) energy += fix[list_thermo_energy_cuda[i]]->compute_scalar(); if(n_thermo_energy_host != 0) { cuda->downloadAll(); cuda->oncpu = true; for (int i = 0; i < n_thermo_energy_host; i++) energy += fix[list_thermo_energy[i]]->compute_scalar(); cuda->uploadAll(); cuda->oncpu = false; } return energy; } void ModifyCuda::list_init_end_of_step_cuda(int mask, int &n, int *&list) { delete [] list; delete [] end_of_step_every_cuda; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask) n++; list = new int[n]; end_of_step_every_cuda = new int[n]; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask) { list[n] = i; end_of_step_every_cuda[n++] = fix[i]->nevery; } }