git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14065 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2015-09-24 20:35:13 +00:00
parent 546186739c
commit d2ee16c936
14 changed files with 2068 additions and 0 deletions
--- a/src/KOKKOS/compute_temp_kokkos.cpp
+++ b/src/KOKKOS/compute_temp_kokkos.cpp
@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "string.h"
+#include "compute_temp_kokkos.h"
+#include "atom_kokkos.h"
+#include "update.h"
+#include "force.h"
+#include "domain.h"
+#include "comm.h"
+#include "group.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+ComputeTempKokkos<DeviceType>::ComputeTempKokkos(LAMMPS *lmp, int narg, char **arg) :
+  ComputeTemp(lmp, narg, arg)
+{
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  datamask_read = V_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
+  datamask_modify = EMPTY_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+double ComputeTempKokkos<DeviceType>::compute_scalar()
+{
+  atomKK->sync(execution_space,datamask_read);
+
+  invoked_scalar = update->ntimestep;
+
+  v = atomKK->k_v.view<DeviceType>();
+  rmass = atomKK->rmass;
+  mass = atomKK->k_mass.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  int nlocal = atom->nlocal;
+
+  double t = 0.0;
+  CTEMP t_kk;
+
+  copymode = 1;
+  if (rmass)
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk);
+  else
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk);
+  DeviceType::fence();
+  copymode = 0;
+
+  t = t_kk.t0; // could make this more efficient
+
+  MPI_Allreduce(&t,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
+  if (dynamic) dof_compute();
+  if (dof < 0.0 && natoms_temp > 0.0) 
+    error->all(FLERR,"Temperature compute degrees of freedom < 0");
+  scalar *= tfactor;
+  return scalar;
+}
+
+template<class DeviceType>
+template<int RMASS>
+KOKKOS_INLINE_FUNCTION
+void ComputeTempKokkos<DeviceType>::operator()(TagComputeTempScalar<RMASS>, const int &i, CTEMP& t_kk) const {
+  if (RMASS) {
+    if (mask[i] & groupbit)
+      t_kk.t0 += (v(i,0)*v(i,0) + v(i,1)*v(i,1) + v(i,2)*v(i,2)) * rmass[i];
+  } else {
+    if (mask[i] & groupbit)
+      t_kk.t0 += (v(i,0)*v(i,0) + v(i,1)*v(i,1) + v(i,2)*v(i,2)) *
+        mass[type[i]];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void ComputeTempKokkos<DeviceType>::compute_vector()
+{
+  atomKK->sync(execution_space,datamask_read);
+
+  int i;
+
+  invoked_vector = update->ntimestep;
+
+  v = atomKK->k_v.view<DeviceType>();
+  rmass = atomKK->rmass;
+  mass = atomKK->k_mass.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  int nlocal = atom->nlocal;
+
+  double t[6];
+  for (i = 0; i < 6; i++) t[i] = 0.0;
+  CTEMP t_kk;
+
+  copymode = 1;
+  if (rmass)
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk);
+  else
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk);
+  DeviceType::fence();
+  copymode = 0;
+
+  t[0] = t_kk.t0;
+  t[1] = t_kk.t1;
+  t[2] = t_kk.t2;
+  t[3] = t_kk.t3;
+  t[4] = t_kk.t4;
+  t[5] = t_kk.t5;
+
+  MPI_Allreduce(t,vector,6,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < 6; i++) vector[i] *= force->mvv2e;
+}
+
+template<class DeviceType>
+template<int RMASS>
+KOKKOS_INLINE_FUNCTION
+void ComputeTempKokkos<DeviceType>::operator()(TagComputeTempVector<RMASS>, const int &i, CTEMP& t_kk) const {
+  if (mask[i] & groupbit) {
+    F_FLOAT massone = 0.0;
+    if (RMASS) massone = rmass[i];
+    else massone = mass[type[i]];
+    t_kk.t0 += massone * v(i,0)*v(i,0);
+    t_kk.t1 += massone * v(i,1)*v(i,1);
+    t_kk.t2 += massone * v(i,2)*v(i,2);
+    t_kk.t3 += massone * v(i,0)*v(i,1);
+    t_kk.t4 += massone * v(i,0)*v(i,2);
+    t_kk.t5 += massone * v(i,1)*v(i,2);
+  }
+}
+
+template class ComputeTempKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class ComputeTempKokkos<LMPHostType>;
+#endif