Merge pull request #3623 from ndtrung81/kokkos-dipole-lj-expand

Adding the kokkos variants for lj/cut/dipole/cut and lj/expand/coul/long
2023-02-03 13:52:47 -05:00
parent 7fbb6095c6 e28ff8a8aa
commit a0a7e76cc3
22 changed files with 2278 additions and 74 deletions
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@ -166,7 +166,7 @@ OPT.
   * :doc:`lj/cut/coul/msm (go) <pair_lj_cut_coul>`
   * :doc:`lj/cut/coul/msm/dielectric <pair_dielectric>`
   * :doc:`lj/cut/coul/wolf (o) <pair_lj_cut_coul>`
-   * :doc:`lj/cut/dipole/cut (go) <pair_dipole>`
+   * :doc:`lj/cut/dipole/cut (gko) <pair_dipole>`
   * :doc:`lj/cut/dipole/long (g) <pair_dipole>`
   * :doc:`lj/cut/dipole/sf (go) <pair_dipole>`
   * :doc:`lj/cut/soft (o) <pair_fep_soft>`
@ -175,7 +175,7 @@ OPT.
   * :doc:`lj/cut/tip4p/long (got) <pair_lj_cut_tip4p>`
   * :doc:`lj/cut/tip4p/long/soft (o) <pair_fep_soft>`
   * :doc:`lj/expand (gko) <pair_lj_expand>`
-   * :doc:`lj/expand/coul/long (g) <pair_lj_expand>`
+   * :doc:`lj/expand/coul/long (gk) <pair_lj_expand>`
   * :doc:`lj/gromacs (gko) <pair_gromacs>`
   * :doc:`lj/gromacs/coul/gromacs (ko) <pair_gromacs>`
   * :doc:`lj/long/coul/long (iot) <pair_lj_long>`
--- a/doc/src/pair_dipole.rst
+++ b/doc/src/pair_dipole.rst
@ -1,5 +1,6 @@
 .. index:: pair_style lj/cut/dipole/cut
 .. index:: pair_style lj/cut/dipole/cut/gpu
+.. index:: pair_style lj/cut/dipole/cut/kk
 .. index:: pair_style lj/cut/dipole/cut/omp
 .. index:: pair_style lj/sf/dipole/sf
 .. index:: pair_style lj/sf/dipole/sf/gpu
@ -11,7 +12,7 @@
 pair_style lj/cut/dipole/cut command
 ====================================

-Accelerator Variants: *lj/cut/dipole/cut/gpu*, *lj/cut/dipole/cut/omp*
+Accelerator Variants: *lj/cut/dipole/cut/gpu*, *lj/cut/dipole/cut/kk*, *lj/cut/dipole/cut/omp*

 pair_style lj/sf/dipole/sf command
 ==================================
--- a/doc/src/pair_lj_expand.rst
+++ b/doc/src/pair_lj_expand.rst
@ -3,6 +3,7 @@
 .. index:: pair_style lj/expand/kk
 .. index:: pair_style lj/expand/omp
 .. index:: pair_style lj/expand/coul/long
+.. index:: pair_style lj/expand/coul/long/kk
 .. index:: pair_style lj/expand/coul/long/gpu

 pair_style lj/expand command
@ -13,7 +14,7 @@ Accelerator Variants: *lj/expand/gpu*, *lj/expand/kk*, *lj/expand/omp*
 pair_style lj/expand/coul/long command
 ======================================

-Accelerator Variants: *lj/expand/coul/long/gpu*
+Accelerator Variants: *lj/expand/coul/long/gpu*, *lj/expand/coul/long/kk*

 Syntax
 """"""
--- a/src/.gitignore
+++ b/src/.gitignore
@ -102,6 +102,8 @@
 /meam*.cpp
 /pair_meam.cpp
 /pair_meam.h
+/pair_meam_ms.cpp
+/pair_meam_ms.h

 /compute_mliap.cpp
 /compute_mliap.h
--- a/src/DIPOLE/atom_vec_dipole.h
+++ b/src/DIPOLE/atom_vec_dipole.h
@ -31,7 +31,7 @@ class AtomVecDipole : virtual public AtomVec {
  void grow_pointers() override;
  void data_atom_post(int) override;

- private:
+ protected:
  double **mu;
 };

--- a/src/DIPOLE/pair_lj_cut_dipole_cut.cpp
+++ b/src/DIPOLE/pair_lj_cut_dipole_cut.cpp
@ -39,6 +39,8 @@ PairLJCutDipoleCut::PairLJCutDipoleCut(LAMMPS *lmp) : Pair(lmp)

 PairLJCutDipoleCut::~PairLJCutDipoleCut()
 {
+  if (copymode) return;
+
  if (allocated) {
    memory->destroy(setflag);
    memory->destroy(cutsq);
--- a/src/DIPOLE/pair_lj_cut_dipole_cut.h
+++ b/src/DIPOLE/pair_lj_cut_dipole_cut.h
@ -46,7 +46,7 @@ class PairLJCutDipoleCut : public Pair {
  double **epsilon, **sigma;
  double **lj1, **lj2, **lj3, **lj4, **offset;

-  void allocate();
+  virtual void allocate();
 };

 }    // namespace LAMMPS_NS
--- a/src/EXTRA-PAIR/pair_lj_expand_coul_long.cpp
+++ b/src/EXTRA-PAIR/pair_lj_expand_coul_long.cpp
@ -60,6 +60,8 @@ PairLJExpandCoulLong::PairLJExpandCoulLong(LAMMPS *lmp) : Pair(lmp)

 PairLJExpandCoulLong::~PairLJExpandCoulLong()
 {
+  if (copymode) return;
+
  if (allocated) {
    memory->destroy(setflag);
    memory->destroy(cutsq);
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@ -64,8 +64,8 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp
 action atom_vec_bond_kokkos.h atom_vec_bond.h
 action atom_vec_charge_kokkos.cpp
 action atom_vec_charge_kokkos.h
-action atom_vec_spin_kokkos.cpp atom_vec_spin.cpp
-action atom_vec_spin_kokkos.h atom_vec_spin.h
+action atom_vec_dipole_kokkos.cpp atom_vec_dipole.cpp
+action atom_vec_dipole_kokkos.h atom_vec_dipole.h
 action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp
 action atom_vec_dpd_kokkos.h atom_vec_dpd.h
 action atom_vec_full_kokkos.cpp atom_vec_full.cpp
@ -78,6 +78,8 @@ action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp
 action atom_vec_molecular_kokkos.h atom_vec_molecular.h
 action atom_vec_sphere_kokkos.cpp atom_vec_sphere.cpp
 action atom_vec_sphere_kokkos.h atom_vec_sphere.h
+action atom_vec_spin_kokkos.cpp atom_vec_spin.cpp
+action atom_vec_spin_kokkos.h atom_vec_spin.h
 action bond_class2_kokkos.cpp bond_class2.cpp
 action bond_class2_kokkos.h bond_class2.h
 action bond_fene_kokkos.cpp bond_fene.cpp
@ -94,10 +96,10 @@ action compute_coord_atom_kokkos.cpp
 action compute_coord_atom_kokkos.h
 action compute_orientorder_atom_kokkos.cpp
 action compute_orientorder_atom_kokkos.h
-action compute_temp_kokkos.cpp
-action compute_temp_kokkos.h
 action compute_temp_deform_kokkos.cpp
 action compute_temp_deform_kokkos.h
+action compute_temp_kokkos.cpp
+action compute_temp_kokkos.h
 action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp
 action dihedral_charmm_kokkos.h dihedral_charmm.h
 action dihedral_class2_kokkos.cpp dihedral_class2.cpp
@ -110,13 +112,15 @@ action domain_kokkos.cpp
 action domain_kokkos.h
 action dynamical_matrix_kokkos.cpp dynamical_matrix.cpp
 action dynamical_matrix_kokkos.h dynamical_matrix.h
-action fftdata_kokkos.h fft3d.h
 action fft3d_kokkos.cpp fft3d.cpp
 action fft3d_kokkos.h fft3d.h
+action fftdata_kokkos.h fft3d.h
 action fix_acks2_reaxff_kokkos.cpp fix_acks2_reaxff.cpp
 action fix_acks2_reaxff_kokkos.h fix_acks2_reaxff.h
 action fix_deform_kokkos.cpp
 action fix_deform_kokkos.h
+action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp
+action fix_dpd_energy_kokkos.h fix_dpd_energy.h
 action fix_dt_reset_kokkos.cpp
 action fix_dt_reset_kokkos.h
 action fix_enforce2d_kokkos.cpp
@ -131,6 +135,8 @@ action fix_langevin_kokkos.cpp
 action fix_langevin_kokkos.h
 action fix_minimize_kokkos.cpp
 action fix_minimize_kokkos.h
+action fix_momentum_kokkos.cpp
+action fix_momentum_kokkos.h
 action fix_neigh_history_kokkos.cpp
 action fix_neigh_history_kokkos.h
 action fix_nh_kokkos.cpp
@ -155,24 +161,20 @@ action fix_reaxff_bonds_kokkos.cpp fix_reaxff_bonds.cpp
 action fix_reaxff_bonds_kokkos.h fix_reaxff_bonds.h
 action fix_reaxff_species_kokkos.cpp fix_reaxff_species.cpp
 action fix_reaxff_species_kokkos.h fix_reaxff_species.h
+action fix_rx_kokkos.cpp fix_rx.cpp
+action fix_rx_kokkos.h fix_rx.h
 action fix_setforce_kokkos.cpp
 action fix_setforce_kokkos.h
 action fix_shake_kokkos.cpp fix_shake.cpp
 action fix_shake_kokkos.h fix_shake.h
 action fix_shardlow_kokkos.cpp fix_shardlow.cpp
 action fix_shardlow_kokkos.h fix_shardlow.h
-action fix_momentum_kokkos.cpp
-action fix_momentum_kokkos.h
+action fix_viscous_kokkos.cpp
+action fix_viscous_kokkos.h
 action fix_wall_lj93_kokkos.cpp
 action fix_wall_lj93_kokkos.h
 action fix_wall_reflect_kokkos.cpp
 action fix_wall_reflect_kokkos.h
-action fix_viscous_kokkos.cpp
-action fix_viscous_kokkos.h
-action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp
-action fix_dpd_energy_kokkos.h fix_dpd_energy.h
-action fix_rx_kokkos.cpp fix_rx.cpp
-action fix_rx_kokkos.h fix_rx.h
 action grid3d_kokkos.cpp fft3d.h
 action grid3d_kokkos.h fft3d.h
 action improper_class2_kokkos.cpp improper_class2.cpp
@ -180,36 +182,48 @@ action improper_class2_kokkos.h improper_class2.h
 action improper_harmonic_kokkos.cpp improper_harmonic.cpp
 action improper_harmonic_kokkos.h improper_harmonic.h
 action kissfft_kokkos.h kissfft.h
-action kokkos.cpp
-action kokkos.h
-action kokkos_base.h
 action kokkos_base_fft.h fft3d.h
+action kokkos_base.h
 action kokkos_few.h
 action kokkos_type.h
-action meam_kokkos.h meam.h
+action kokkos.cpp
+action kokkos.h
+action math_special_kokkos.cpp
+action math_special_kokkos.h
 action meam_dens_final_kokkos.h meam_dens_final.cpp
 action meam_dens_init_kokkos.h meam_dens_init.cpp
 action meam_force_kokkos.h meam_force.cpp
 action meam_funcs_kokkos.h meam_funcs.cpp
 action meam_impl_kokkos.h meam_impl.cpp
+action meam_kokkos.h meam.h
 action meam_setup_done_kokkos.h meam_setup_done.cpp
 action memory_kokkos.h
+action min_cg_kokkos.cpp
+action min_cg_kokkos.h
+action min_kokkos.cpp
+action min_kokkos.h
+action min_linesearch_kokkos.cpp
+action min_linesearch_kokkos.h
 action mliap_data_kokkos.cpp mliap_data.cpp
 action mliap_data_kokkos.h mliap_data.h
 action mliap_descriptor_kokkos.h mliap_descriptor.h
 action mliap_descriptor_so3_kokkos.cpp mliap_descriptor_so3.cpp
 action mliap_descriptor_so3_kokkos.h mliap_descriptor_so3.h
+action mliap_model_kokkos.h mliap_model.h
 action mliap_model_linear_kokkos.cpp mliap_model_linear.cpp
 action mliap_model_linear_kokkos.h mliap_model_linear.h
 action mliap_model_python_kokkos.cpp mliap_model_linear.cpp
 action mliap_model_python_kokkos.h mliap_model_linear.h
-action mliap_model_kokkos.h mliap_model.h
-action mliap_unified_kokkos.cpp mliap_unified.cpp
-action mliap_unified_kokkos.h mliap_unified.h
 action mliap_so3_kokkos.cpp mliap_so3.cpp
 action mliap_so3_kokkos.h mliap_so3.h
+action mliap_unified_kokkos.cpp mliap_unified.cpp
+action mliap_unified_kokkos.h mliap_unified.h
 action modify_kokkos.cpp
 action modify_kokkos.h
+action nbin_kokkos.cpp
+action nbin_kokkos.h
+action nbin_ssa_kokkos.cpp nbin_ssa.cpp
+action nbin_ssa_kokkos.h nbin_ssa.h
 action neigh_bond_kokkos.cpp
 action neigh_bond_kokkos.h
 action neigh_list_kokkos.cpp
@ -220,26 +234,14 @@ action npair_copy_kokkos.cpp
 action npair_copy_kokkos.h
 action npair_halffull_kokkos.cpp
 action npair_halffull_kokkos.h
-action npair_skip_kokkos.cpp
-action npair_skip_kokkos.h
-action npair_trim_kokkos.cpp
-action npair_trim_kokkos.h
 action npair_kokkos.cpp
 action npair_kokkos.h
+action npair_skip_kokkos.cpp
+action npair_skip_kokkos.h
 action npair_ssa_kokkos.cpp npair_half_bin_newton_ssa.cpp
 action npair_ssa_kokkos.h npair_half_bin_newton_ssa.h
-action nbin_kokkos.cpp
-action nbin_kokkos.h
-action nbin_ssa_kokkos.cpp nbin_ssa.cpp
-action nbin_ssa_kokkos.h nbin_ssa.h
-action math_special_kokkos.cpp
-action math_special_kokkos.h
-action min_cg_kokkos.cpp
-action min_cg_kokkos.h
-action min_kokkos.cpp
-action min_kokkos.h
-action min_linesearch_kokkos.cpp
-action min_linesearch_kokkos.h
+action npair_trim_kokkos.cpp
+action npair_trim_kokkos.h
 action pack_kokkos.h pack.h
 action pair_adp_kokkos.cpp pair_adp.cpp
 action pair_adp_kokkos.h pair_adp.h
@ -259,26 +261,26 @@ action pair_coul_long_kokkos.cpp pair_coul_long.cpp
 action pair_coul_long_kokkos.h pair_coul_long.h
 action pair_coul_wolf_kokkos.cpp
 action pair_coul_wolf_kokkos.h
-action pair_dpd_kokkos.h pair_dpd.h
-action pair_dpd_kokkos.cpp pair_dpd.cpp
 action pair_dpd_ext_kokkos.cpp pair_dpd_ext.cpp
 action pair_dpd_ext_kokkos.h pair_dpd_ext.h
-action pair_dpd_ext_tstat_kokkos.h pair_dpd_ext_tstat.h
 action pair_dpd_ext_tstat_kokkos.cpp pair_dpd_ext_tstat.cpp
-action pair_dpd_tstat_kokkos.h pair_dpd_tstat.h
-action pair_dpd_tstat_kokkos.cpp pair_dpd_tstat.cpp
+action pair_dpd_ext_tstat_kokkos.h pair_dpd_ext_tstat.h
 action pair_dpd_fdt_energy_kokkos.cpp pair_dpd_fdt_energy.cpp
 action pair_dpd_fdt_energy_kokkos.h pair_dpd_fdt_energy.h
-action pair_eam_kokkos.cpp pair_eam.cpp
-action pair_eam_kokkos.h pair_eam.h
+action pair_dpd_kokkos.cpp pair_dpd.cpp
+action pair_dpd_kokkos.h pair_dpd.h
+action pair_dpd_tstat_kokkos.cpp pair_dpd_tstat.cpp
+action pair_dpd_tstat_kokkos.h pair_dpd_tstat.h
 action pair_eam_alloy_kokkos.cpp pair_eam_alloy.cpp
 action pair_eam_alloy_kokkos.h pair_eam_alloy.h
 action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp
 action pair_eam_fs_kokkos.h pair_eam_fs.h
+action pair_eam_kokkos.cpp pair_eam.cpp
+action pair_eam_kokkos.h pair_eam.h
 action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp
 action pair_exp6_rx_kokkos.h pair_exp6_rx.h
-action pair_gran_hooke_history_kokkos.h pair_gran_hooke_history.h
 action pair_gran_hooke_history_kokkos.cpp pair_gran_hooke_history.cpp
+action pair_gran_hooke_history_kokkos.h pair_gran_hooke_history.h
 action pair_hybrid_kokkos.cpp
 action pair_hybrid_kokkos.h
 action pair_hybrid_overlay_kokkos.cpp
@ -304,8 +306,12 @@ action pair_lj_cut_coul_dsf_kokkos.cpp pair_lj_cut_coul_dsf.cpp
 action pair_lj_cut_coul_dsf_kokkos.h pair_lj_cut_coul_dsf.h
 action pair_lj_cut_coul_long_kokkos.cpp pair_lj_cut_coul_long.cpp
 action pair_lj_cut_coul_long_kokkos.h pair_lj_cut_coul_long.h
+action pair_lj_cut_dipole_cut_kokkos.cpp pair_lj_cut_dipole_cut.cpp
+action pair_lj_cut_dipole_cut_kokkos.h pair_lj_cut_dipole_cut.h
 action pair_lj_cut_kokkos.cpp
 action pair_lj_cut_kokkos.h
+action pair_lj_expand_coul_long_kokkos.cpp pair_lj_expand_coul_long.cpp
+action pair_lj_expand_coul_long_kokkos.h pair_lj_expand_coul_long.h
 action pair_lj_expand_kokkos.cpp
 action pair_lj_expand_kokkos.h
 action pair_lj_gromacs_coul_gromacs_kokkos.cpp pair_lj_gromacs_coul_gromacs.cpp
@ -324,19 +330,17 @@ action pair_morse_kokkos.cpp
 action pair_morse_kokkos.h
 action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
 action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h
-action pair_pace_kokkos.cpp pair_pace.cpp
-action pair_pace_kokkos.h pair_pace.h
 action pair_pace_extrapolation_kokkos.cpp pair_pace_extrapolation.cpp
 action pair_pace_extrapolation_kokkos.h pair_pace_extrapolation.h
+action pair_pace_kokkos.cpp pair_pace.cpp
+action pair_pace_kokkos.h pair_pace.h
 action pair_reaxff_kokkos.cpp pair_reaxff.cpp
 action pair_reaxff_kokkos.h pair_reaxff.h
+action pair_snap_kokkos_impl.h pair_snap.cpp
 action pair_snap_kokkos.cpp pair_snap.cpp
 action pair_snap_kokkos.h pair_snap.h
-action pair_snap_kokkos_impl.h pair_snap.cpp
 action pair_sw_kokkos.cpp pair_sw.cpp
 action pair_sw_kokkos.h pair_sw.h
-action pair_vashishta_kokkos.cpp pair_vashishta.cpp
-action pair_vashishta_kokkos.h pair_vashishta.h
 action pair_table_kokkos.cpp
 action pair_table_kokkos.h
 action pair_table_rx_kokkos.cpp pair_table_rx.cpp
@ -347,6 +351,8 @@ action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp
 action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h
 action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
 action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
+action pair_vashishta_kokkos.cpp pair_vashishta.cpp
+action pair_vashishta_kokkos.h pair_vashishta.h
 action pair_yukawa_kokkos.cpp
 action pair_yukawa_kokkos.h
 action pair_zbl_kokkos.cpp
@ -359,8 +365,8 @@ action region_block_kokkos.cpp
 action region_block_kokkos.h
 action remap_kokkos.cpp remap.cpp
 action remap_kokkos.h remap.h
-action sna_kokkos.h sna.h
 action sna_kokkos_impl.h sna.cpp
+action sna_kokkos.h sna.h
 action third_order_kokkos.cpp dynamical_matrix.cpp
 action third_order_kokkos.h dynamical_matrix.h
 action transpose_helper_kokkos.h
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@ -34,6 +34,7 @@ class AtomKokkos : public Atom {
  DAT::tdual_float_1d k_q;
  DAT::tdual_float_1d k_radius;
  DAT::tdual_float_1d k_rmass;
+  DAT::tdual_float_1d_4 k_mu;
  DAT::tdual_v_array k_omega;
  DAT::tdual_v_array k_angmom;
  DAT::tdual_f_array k_torque;
--- a/src/KOKKOS/atom_vec_atomic_kokkos.h
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.h
@ -71,4 +71,3 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic {

 #endif
 #endif
-
--- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp
@ -0,0 +1,666 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "atom_vec_dipole_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "error.h"
+#include "fix.h"
+#include "memory_kokkos.h"
+#include "modify.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecDipoleKokkos::AtomVecDipoleKokkos(LAMMPS *lmp) : AtomVec(lmp),
+AtomVecKokkos(lmp), AtomVecDipole(lmp)
+{
+
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by DELTA
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecDipoleKokkos::grow(int n)
+{
+  auto DELTA = LMP_KOKKOS_AV_DELTA;
+  int step = MAX(DELTA,nmax*0.01);
+  if (n == 0) nmax += step;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  atomKK->sync(Device,ALL_MASK);
+  atomKK->modified(Device,ALL_MASK);
+
+  memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x");
+  memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v");
+  memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f");
+
+  memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q");
+  memoryKK->grow_kokkos(atomKK->k_mu,atomKK->mu,nmax,"atom:mu");
+
+  grow_pointers();
+  atomKK->sync(Host,ALL_MASK);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecDipoleKokkos::grow_pointers()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+
+  q = atomKK->q;
+  d_q = atomKK->k_q.d_view;
+  h_q = atomKK->k_q.h_view;
+  mu = atomKK->mu;
+  d_mu = atomKK->k_mu.d_view;
+  h_mu = atomKK->k_mu.h_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecDipoleKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_mu_array_randomread _mu;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecDipoleKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_float_1d_4 &mu,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),
+      _mu(mu.view<DeviceType>()),
+      _list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t elements = 7; // size_forward
+        const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/elements;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    if (PBC_FLAG == 0) {
+      _buf(i,0) = _x(j,0);
+      _buf(i,1) = _x(j,1);
+      _buf(i,2) = _x(j,2);
+      _buf(i,3) = _mu(j,0);
+      _buf(i,4) = _mu(j,1);
+      _buf(i,5) = _mu(j,2);
+      _buf(i,6) = _mu(j,3);
+    } else {
+      if (TRICLINIC == 0) {
+        _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+        _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+        _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        _buf(i,3) = _mu(j,0);
+        _buf(i,4) = _mu(j,1);
+        _buf(i,5) = _mu(j,2);
+        _buf(i,6) = _mu(j,3);
+      } else {
+        _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+        _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+        _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        _buf(i,3) = _mu(j,0);
+        _buf(i,4) = _mu(j,1);
+        _buf(i,5) = _mu(j,2);
+        _buf(i,6) = _mu(j,3);
+      }
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecDipoleKokkos_PackBorder {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
+  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  const typename ArrayTypes<DeviceType>::t_int_1d _type;
+  const typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  const typename ArrayTypes<DeviceType>::t_float_1d _q;
+  const typename ArrayTypes<DeviceType>::t_mu_array_randomread _mu;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecDipoleKokkos_PackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
+      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
+      const int & iswap,
+      const typename ArrayTypes<DeviceType>::t_x_array &x,
+      const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      const typename ArrayTypes<DeviceType>::t_int_1d &type,
+      const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      const typename ArrayTypes<DeviceType>::t_float_1d &q,
+      const typename ArrayTypes<DeviceType>::t_mu_array_randomread &mu,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+      _buf(buf),_list(list),_iswap(iswap),
+      _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu),
+      _dx(dx),_dy(dy),_dz(dz) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = d_ubuf(_tag(j)).d;
+          _buf(i,4) = d_ubuf(_type(j)).d;
+          _buf(i,5) = d_ubuf(_mask(j)).d;
+          _buf(i,6) = _q(j);
+          _buf(i,7) = _mu(j,0);
+          _buf(i,8) = _mu(j,1);
+          _buf(i,9) = _mu(j,2);
+          _buf(i,10) = _mu(j,3);
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = d_ubuf(_tag(j)).d;
+          _buf(i,4) = d_ubuf(_type(j)).d;
+          _buf(i,5) = d_ubuf(_mask(j)).d;
+          _buf(i,6) = _q(j);
+          _buf(i,7) = _mu(j,0);
+          _buf(i,8) = _mu(j,1);
+          _buf(i,9) = _mu(j,2);
+          _buf(i,10) = _mu(j,3);
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+                               int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (space==Host) {
+      AtomVecDipoleKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    } else {
+      AtomVecDipoleKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if (space==Host) {
+      AtomVecDipoleKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    } else {
+      AtomVecDipoleKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecDipoleKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+
+  const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  typename ArrayTypes<DeviceType>::t_int_1d _type;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_float_1d _q;
+  typename ArrayTypes<DeviceType>::t_mu_array _mu;
+  int _first;
+
+
+  AtomVecDipoleKokkos_UnpackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
+      typename ArrayTypes<DeviceType>::t_x_array &x,
+      typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      typename ArrayTypes<DeviceType>::t_int_1d &type,
+      typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      typename ArrayTypes<DeviceType>::t_float_1d &q,
+      typename ArrayTypes<DeviceType>::t_mu_array &mu,
+      const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu),_first(first) {
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
+      _type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
+      _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
+      _q(i+_first) = _buf(i,6);
+      _mu(i+_first,0) = _buf(i,7);
+      _mu(i+_first,1) = _buf(i,8);
+      _mu(i+_first,2) = _buf(i,9);
+      _mu(i+_first,3) = _buf(i,10);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecDipoleKokkos::unpack_border_kokkos(const int &n, const int &first,
+                     const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
+  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK);
+  while (first+n >= nmax) grow(0);
+  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK);
+  if (space==Host) {
+    struct AtomVecDipoleKokkos_UnpackBorder<LMPHostType>
+      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_mu,first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    struct AtomVecDipoleKokkos_UnpackBorder<LMPDeviceType>
+      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,d_mu,first);
+    Kokkos::parallel_for(n,f);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecDipoleKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_float_1d_randomread _q;
+  typename AT::t_mu_array_randomread _mu;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_float_1d _qw;
+  typename AT::t_sp_array _muw;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecDipoleKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _q(atom->k_q.view<DeviceType>()),
+    _mu(atom->k_mu.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+    _qw(atom->k_q.view<DeviceType>()),
+    _muw(atom->k_mu.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi) {
+    const size_t elements = 16; // 1st = # of values, followed by 15 values (see below)
+    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
+                             buf.template view<DeviceType>().extent(1))/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = 16; // elements
+    _buf(mysend,1) = _x(i,0);
+    _buf(mysend,2) = _x(i,1);
+    _buf(mysend,3) = _x(i,2);
+    _buf(mysend,4) = _v(i,0);
+    _buf(mysend,5) = _v(i,1);
+    _buf(mysend,6) = _v(i,2);
+    _buf(mysend,7) = d_ubuf(_tag[i]).d;
+    _buf(mysend,8) = d_ubuf(_type[i]).d;
+    _buf(mysend,9) = d_ubuf(_mask[i]).d;
+    _buf(mysend,10) = d_ubuf(_image[i]).d;
+    _buf(mysend,11) = _q[i];
+    _buf(mysend,12) = _mu(i,0);
+    _buf(mysend,13) = _mu(i,1);
+    _buf(mysend,14) = _mu(i,2);
+    _buf(mysend,15) = _mu(i,3);
+    const int j = _copylist(mysend);
+
+    if (j>-1) {
+      _xw(i,0) = _x(j,0);
+      _xw(i,1) = _x(j,1);
+      _xw(i,2) = _x(j,2);
+      _vw(i,0) = _v(j,0);
+      _vw(i,1) = _v(j,1);
+      _vw(i,2) = _v(j,2);
+      _tagw(i) = _tag(j);
+      _typew(i) = _type(j);
+      _maskw(i) = _mask(j);
+      _imagew(i) = _image(j);
+      _qw(i) = _q(j);
+      _muw(i,0) = _mu(j,0);
+      _muw(i,1) = _mu(j,1);
+      _muw(i,2) = _mu(j,2);
+      _muw(i,3) = _mu(j,3);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
+                                              DAT::tdual_int_1d k_sendlist,
+                                              DAT::tdual_int_1d k_copylist,
+                                              ExecutionSpace space,int dim,
+                                              X_FLOAT lo,X_FLOAT hi )
+{
+  const size_t nelements = 16; // # of elements packed
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
+    int newsize = nsend*nelements/k_buf.view<LMPHostType>().extent(1)+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
+  }
+  if (space == Host) {
+    AtomVecDipoleKokkos_PackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    return nsend*nelements;
+  } else {
+    AtomVecDipoleKokkos_PackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    return nsend*nelements;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_float_1d _q;
+  typename AT::t_mu_array _mu;
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecDipoleKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _q(atom->k_q.view<DeviceType>()),
+    _mu(atom->k_mu.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi) {
+    const size_t elements = 16;
+    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      _x(i,0) = _buf(myrecv,1);
+      _x(i,1) = _buf(myrecv,2);
+      _x(i,2) = _buf(myrecv,3);
+      _v(i,0) = _buf(myrecv,4);
+      _v(i,1) = _buf(myrecv,5);
+      _v(i,2) = _buf(myrecv,6);
+      _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i;
+      _type[i] = (int) d_ubuf(_buf(myrecv,8)).i;
+      _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i;
+      _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
+      _q[i] = _buf(myrecv,11);
+      _mu(i,0) = _buf(myrecv,12);
+      _mu(i,1) = _buf(myrecv,13);
+      _mu(i,2) = _buf(myrecv,14);
+      _mu(i,3) = _buf(myrecv,15);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
+                                                int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
+                                                ExecutionSpace space) {
+  const size_t nelements = 16; // # of elements packed
+  if (space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecDipoleKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/nelements,f);
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecDipoleKokkos_UnpackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/nelements,f);
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecDipoleKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask & Q_MASK) atomKK->k_q.sync<LMPDeviceType>();
+    if (mask & MU_MASK) atomKK->k_mu.sync<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask & Q_MASK) atomKK->k_q.sync<LMPHostType>();
+    if (mask & MU_MASK) atomKK->k_mu.sync<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecDipoleKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask & Q_MASK) atomKK->k_q.modify<LMPDeviceType>();
+    if (mask & MU_MASK) atomKK->k_mu.modify<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask & Q_MASK) atomKK->k_q.modify<LMPHostType>();
+    if (mask & MU_MASK) atomKK->k_mu.modify<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecDipoleKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+    if ((mask & MU_MASK) && atomKK->k_mu.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_float_1d_4>(atomKK->k_mu,space);
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+    if ((mask & MU_MASK) && atomKK->k_mu.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_float_1d_4>(atomKK->k_mu,space);
+  }
+}
--- a/src/KOKKOS/atom_vec_dipole_kokkos.h
+++ b/src/KOKKOS/atom_vec_dipole_kokkos.h
@ -0,0 +1,82 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+// clang-format off
+AtomStyle(dipole/kk,AtomVecDipoleKokkos);
+AtomStyle(dipole/kk/device,AtomVecDipoleKokkos);
+AtomStyle(dipole/kk/host,AtomVecDipoleKokkos);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_ATOM_VEC_DIPOLE_KOKKOS_H
+#define LMP_ATOM_VEC_DIPOLE_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+#include "atom_vec_dipole.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole {
+ public:
+  AtomVecDipoleKokkos(class LAMMPS *);
+
+  void grow(int) override;
+  void grow_pointers() override;
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space) override;
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space) override;
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi) override;
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space) override;
+
+  void sync(ExecutionSpace space, unsigned int mask) override;
+  void modified(ExecutionSpace space, unsigned int mask) override;
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask) override;
+
+ protected:
+  double *q;
+
+  DAT::t_tagint_1d d_tag;
+  HAT::t_tagint_1d h_tag;
+
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+
+  DAT::t_float_1d d_q;
+  HAT::t_float_1d h_q;
+  DAT::t_mu_array d_mu;
+  HAT::t_mu_array h_mu;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
--- a/src/KOKKOS/fix_nve_sphere_kokkos.cpp
+++ b/src/KOKKOS/fix_nve_sphere_kokkos.cpp
@ -50,10 +50,6 @@ template<class DeviceType>
 void FixNVESphereKokkos<DeviceType>::init()
 {
  FixNVESphere::init();
-
-  if (extra == DIPOLE) {
-    error->all(FLERR,"Fix nve/sphere/kk doesn't yet support dipole");
-  }
 }

 /* ---------------------------------------------------------------------- */
@ -61,7 +57,10 @@ void FixNVESphereKokkos<DeviceType>::init()
 template<class DeviceType>
 void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
 {
-  atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
+  if (extra == DIPOLE)
+    atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK | MU_MASK);
+  else
+    atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);

  x = atomKK->k_x.view<DeviceType>();
  v = atomKK->k_v.view<DeviceType>();
@ -71,6 +70,7 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
  mask = atomKK->k_mask.view<DeviceType>();
  rmass = atomKK->k_rmass.view<DeviceType>();
  radius = atomKK->k_radius.view<DeviceType>();
+  mu = atomKK->k_mu.view<DeviceType>();

  int nlocal = atom->nlocal;
  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
@ -78,7 +78,10 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
  FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this);
  Kokkos::parallel_for(nlocal,f);

-  atomKK->modified(execution_space,  X_MASK | V_MASK | OMEGA_MASK);
+  if (extra == DIPOLE)
+    atomKK->modified(execution_space,  X_MASK | V_MASK | OMEGA_MASK | MU_MASK);
+  else
+    atomKK->modified(execution_space,  X_MASK | V_MASK | OMEGA_MASK);
 }

 /* ---------------------------------------------------------------------- */
@ -102,6 +105,17 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate_item(const int i) const
    omega(i,0) += dtirotate * torque(i,0);
    omega(i,1) += dtirotate * torque(i,1);
    omega(i,2) += dtirotate * torque(i,2);
+
+    if (extra == DIPOLE) {
+      const double g0 = mu(i,0) + dtv * (omega(i,1) * mu(i,2) - omega(i,2) * mu(i,1));
+      const double g1 = mu(i,1) + dtv * (omega(i,2) * mu(i,0) - omega(i,0) * mu(i,2));
+      const double g2 = mu(i,2) + dtv * (omega(i,0) * mu(i,1) - omega(i,1) * mu(i,0));
+      const double msq = g0*g0 + g1*g1 + g2*g2;
+      const double scale = mu(i,3)/sqrt(msq);
+      mu(i,0) = g0*scale;
+      mu(i,1) = g1*scale;
+      mu(i,2) = g2*scale;
+    }
  }
 }

--- a/src/KOKKOS/fix_nve_sphere_kokkos.h
+++ b/src/KOKKOS/fix_nve_sphere_kokkos.h
@ -47,6 +47,7 @@ class FixNVESphereKokkos : public FixNVESphere {
    typename ArrayTypes<DeviceType>::t_x_array x;
    typename ArrayTypes<DeviceType>::t_v_array v;
    typename ArrayTypes<DeviceType>::t_v_array omega;
+    typename ArrayTypes<DeviceType>::t_mu_array mu;
    typename ArrayTypes<DeviceType>::t_f_array f;
    typename ArrayTypes<DeviceType>::t_f_array torque;
    typename ArrayTypes<DeviceType>::t_float_1d rmass;
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@ -717,6 +717,11 @@ typedef tdual_float_3d::t_dev_um t_float_3d_um;
 typedef tdual_float_3d::t_dev_const_um t_float_3d_const_um;
 typedef tdual_float_3d::t_dev_const_randomread t_float_3d_randomread;

+#ifdef LMP_KOKKOS_NO_LEGACY
+typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutLeft, LMPDeviceType> tdual_float_1d_4;
+#else
+typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutRight, LMPDeviceType> tdual_float_1d_4;
+#endif

 //Position Types
 //1d X_FLOAT array n
@ -819,6 +824,14 @@ typedef tdual_f_array::t_dev_um t_f_array_um;
 typedef tdual_f_array::t_dev_const_um t_f_array_const_um;
 typedef tdual_f_array::t_dev_const_randomread t_f_array_randomread;

+//2d F_FLOAT array n*4 (for dipoles and quaterions)
+
+typedef tdual_float_1d_4::t_dev t_mu_array;
+typedef tdual_float_1d_4::t_dev_const t_mu_array_const;
+typedef tdual_float_1d_4::t_dev_um t_mu_array_um;
+typedef tdual_float_1d_4::t_dev_const_um t_mu_array_const_um;
+typedef tdual_float_1d_4::t_dev_const_randomread t_mu_array_randomread;
+
 //2d F_FLOAT array n*6 (for virial)

 typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
@ -831,11 +844,7 @@ typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread;
 // Spin Types

 //3d SP_FLOAT array n*4
-#ifdef LMP_KOKKOS_NO_LEGACY
-typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutLeft, LMPDeviceType> tdual_float_1d_4;
-#else
-typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutRight, LMPDeviceType> tdual_float_1d_4;
-#endif
+
 typedef tdual_float_1d_4::t_dev t_sp_array;
 typedef tdual_float_1d_4::t_dev_const t_sp_array_const;
 typedef tdual_float_1d_4::t_dev_um t_sp_array_um;
@ -1008,6 +1017,12 @@ typedef tdual_float_2d::t_host_um t_float_2d_um;
 typedef tdual_float_2d::t_host_const_um t_float_2d_const_um;
 typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread;

+#ifdef LMP_KOKKOS_NO_LEGACY
+typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutLeft, LMPDeviceType> tdual_float_1d_4;
+#else
+typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutRight, LMPDeviceType> tdual_float_1d_4;
+#endif
+
 //Position Types
 //1d X_FLOAT array n
 typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
@ -1101,6 +1116,14 @@ typedef tdual_f_array::t_host_um t_f_array_um;
 typedef tdual_f_array::t_host_const_um t_f_array_const_um;
 typedef tdual_f_array::t_host_const_randomread t_f_array_randomread;

+//2d F_FLOAT array n*4 (for dipoles and quaterions)
+
+typedef tdual_float_1d_4::t_host t_mu_array;
+typedef tdual_float_1d_4::t_host_const t_mu_array_const;
+typedef tdual_float_1d_4::t_host_um t_mu_array_um;
+typedef tdual_float_1d_4::t_host_const_um t_mu_array_const_um;
+typedef tdual_float_1d_4::t_host_const_randomread t_mu_array_randomread;
+
 //2d F_FLOAT array n*6 (for virial)
 typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
 typedef tdual_virial_array::t_host t_virial_array;
@ -1112,11 +1135,6 @@ typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread;
 // Spin types

 //2d X_FLOAT array n*4
-#ifdef LMP_KOKKOS_NO_LEGACY
-typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutLeft, LMPDeviceType> tdual_float_1d_4;
-#else
-typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutRight, LMPDeviceType> tdual_float_1d_4;
-#endif
 typedef tdual_float_1d_4::t_host t_sp_array;
 typedef tdual_float_1d_4::t_host_const t_sp_array_const;
 typedef tdual_float_1d_4::t_host_um t_sp_array_um;
--- a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp
@ -0,0 +1,644 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_lj_cut_dipole_cut_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "math_const.h"
+#include "memory_kokkos.h"
+#include "neigh_list_kokkos.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "respa.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutDipoleCutKokkos<DeviceType>::PairLJCutDipoleCutKokkos(LAMMPS *lmp) : PairLJCutDipoleCut(lmp)
+{
+  respa_enable = 0;
+
+  kokkosable = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TORQUE_MASK | TYPE_MASK | Q_MASK | MU_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | TORQUE_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutDipoleCutKokkos<DeviceType>::~PairLJCutDipoleCutKokkos()
+{
+  if (copymode) return;
+
+  if (allocated) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->destroy_kokkos(k_cutsq,cutsq);
+    memoryKK->destroy_kokkos(k_cut_ljsq,cut_ljsq);
+    memoryKK->destroy_kokkos(k_cut_coulsq,cut_coulsq);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutDipoleCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  ev_init(eflag,vflag,0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_cut_ljsq.template sync<DeviceType>();
+  k_cut_coulsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK | TORQUE_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  torque = atomKK->k_torque.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  mu = atomKK->k_mu.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  special_coul[0] = force->special_coul[0];
+  special_coul[1] = force->special_coul[1];
+  special_coul[2] = force->special_coul[2];
+  special_coul[3] = force->special_coul[3];
+  qqrd2e = force->qqrd2e;
+  newton_pair = force->newton_pair;
+
+  // get the neighbor list and neighbors used in operator()
+
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+  int inum = list->inum;
+
+  // loop over neighbors of my atoms
+
+  copymode = 1;
+
+  EV_FLOAT ev;
+
+  // compute kernel NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS
+  if (atom->ntypes > MAX_TYPES_STACKPARAMS) { // STACKPARAMS==false
+    if (evflag) { // EVFLAG==1
+      if (neighflag == HALF) {
+        if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,1,1,false> >(0,inum),*this,ev);
+        else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,0,1,false> >(0,inum),*this,ev);
+      } else if (neighflag == HALFTHREAD) {
+        if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,1,1,false> >(0,inum),*this,ev);
+        else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,0,1,false> >(0,inum),*this,ev);
+      } else if (neighflag == FULL) {
+        if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,1,1,false> >(0,inum),*this,ev);
+        else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,0,1,false> >(0,inum),*this,ev);
+      }
+    } else {  // EVFLAG==0
+      if (neighflag == HALF) {
+        if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,1,0,false> >(0,inum),*this);
+        else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,0,0,false> >(0,inum),*this);
+      } else if (neighflag == HALFTHREAD) {
+        if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,1,0,false> >(0,inum),*this);
+        else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,0,0,false> >(0,inum),*this);
+      } else if (neighflag == FULL) {
+        if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,1,0,false> >(0,inum),*this);
+        else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,0,0,false> >(0,inum),*this);
+      }
+    }
+  } else { // STACKPARAMS==true
+    if (evflag) { // EVFLAG==1
+      if (neighflag == HALF) {
+        if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,1,1,true> >(0,inum),*this,ev);
+        else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,0,1,true> >(0,inum),*this,ev);
+      } else if (neighflag == HALFTHREAD) {
+        if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,1,1,true> >(0,inum),*this,ev);
+        else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,0,1,true> >(0,inum),*this,ev);
+      } else if (neighflag == FULL) {
+        if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,1,1,true> >(0,inum),*this,ev);
+        else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,0,1,true> >(0,inum),*this,ev);
+      }
+    } else {
+      if (neighflag == HALF) {
+        if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,1,0,true> >(0,inum),*this);
+        else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALF,0,0,true> >(0,inum),*this);
+      } else if (neighflag == HALFTHREAD) {
+        if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,1,0,true> >(0,inum),*this);
+        else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<HALFTHREAD,0,0,true> >(0,inum),*this);
+      } else if (neighflag == FULL) {
+        if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,1,0,true> >(0,inum),*this);
+        else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairLJCutDipoleCutKernel<FULL,0,0,true> >(0,inum),*this);
+      }
+    }
+  }
+
+  if (eflag_global) {
+    eng_vdwl += ev.evdwl;
+    eng_coul += ev.ecoul;
+  }
+
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  copymode = 0;
+}
+
+/* ----------------------------------------------------------------------
+  needs torque as with AtomVecSphereKokkos
+  needs energy calculation as well
+  ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
+KOKKOS_INLINE_FUNCTION
+void PairLJCutDipoleCutKokkos<DeviceType>::operator()(TagPairLJCutDipoleCutKernel<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int ii, EV_FLOAT &ev) const {
+
+  // The f and torque arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_torque = torque;
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const X_FLOAT mui = mu(i,3);
+  const int itype = type(i);
+  const F_FLOAT qtmp = q[i];
+
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT fx_i = 0.0;
+  F_FLOAT fy_i = 0.0;
+  F_FLOAT fz_i = 0.0;
+  F_FLOAT torquex_i = 0.0;
+  F_FLOAT torquey_i = 0.0;
+  F_FLOAT torquez_i = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    int j = d_neighbors(i,jj);
+    const F_FLOAT factor_lj = special_lj[sbmask(j)];
+    const F_FLOAT factor_coul = special_coul[sbmask(j)];
+    j &= NEIGHMASK;
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const X_FLOAT muj = mu(j,3);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    X_FLOAT cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype);
+
+    if (rsq < cutsq_ij) {
+      const F_FLOAT r2inv = 1.0/rsq;
+      const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+      F_FLOAT forcelj = 0;
+      F_FLOAT evdwl = 0;
+      F_FLOAT ecoul = 0;
+      F_FLOAT forcecoulx = 0;
+      F_FLOAT forcecouly = 0;
+      F_FLOAT forcecoulz = 0;
+      F_FLOAT tixcoul = 0;
+      F_FLOAT tiycoul = 0;
+      F_FLOAT tizcoul = 0;
+      F_FLOAT tjxcoul = 0;
+      F_FLOAT tjycoul = 0;
+      F_FLOAT tjzcoul = 0;
+      F_FLOAT fx = 0;
+      F_FLOAT fy = 0;
+      F_FLOAT fz = 0;
+
+      // lj term
+
+      X_FLOAT cut_ljsq_ij = STACKPARAMS?m_cut_ljsq[itype][jtype]:d_cut_ljsq(itype,jtype);
+      if (rsq < cut_ljsq_ij) {
+        forcelj = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
+                           (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
+        forcelj *= factor_lj*r2inv;
+        if (eflag_global) {
+          evdwl = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
+                          (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
+                          (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
+          evdwl *= factor_lj;
+          ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*evdwl;
+        }
+      } // cutsq_ljsq_ij
+
+      // coul term
+
+      X_FLOAT cut_coulsq_ij = STACKPARAMS?m_cut_coulsq[itype][jtype]:d_cut_coulsq(itype,jtype);
+
+      if (rsq < cut_coulsq_ij) {
+
+        const F_FLOAT r2inv = 1.0/rsq;
+        const F_FLOAT rinv = sqrt(r2inv);
+        const F_FLOAT qj = q[j];
+
+        X_FLOAT r3inv = r2inv*rinv;
+
+        // charge-charge
+        if (qtmp != 0.0 && qj != 0.0) {
+          X_FLOAT pre1 = qtmp*qj*r3inv;
+          forcecoulx += pre1*delx;
+          forcecouly += pre1*dely;
+          forcecoulz += pre1*delz;
+        }
+
+        // dipole-dipole
+
+        F_FLOAT pdotp, pidotr, pjdotr;
+        F_FLOAT r5inv = r3inv*r2inv;
+
+       if (mui > 0.0 && muj > 0.0) {
+
+          F_FLOAT r7inv = r5inv*r2inv;
+
+          pdotp = mu(i,0)*mu(j,0) + mu(i,1)*mu(j,1) + mu(i,2)*mu(j,2);
+          pidotr = mu(i,0)*delx + mu(i,1)*dely + mu(i,2)*delz;
+          pjdotr = mu(j,0)*delx + mu(j,1)*dely + mu(j,2)*delz;
+
+          X_FLOAT pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr;
+          F_FLOAT pre2 = 3.0*r5inv*pjdotr;
+          F_FLOAT pre3 = 3.0*r5inv*pidotr;
+          F_FLOAT pre4 = -1.0*r3inv;
+
+          forcecoulx += pre1*delx + pre2*mu(i,0) + pre3*mu(j,0);
+          forcecouly += pre1*dely + pre2*mu(i,1) + pre3*mu(j,1);
+          forcecoulz += pre1*delz + pre2*mu(i,2) + pre3*mu(j,2);
+
+          F_FLOAT crossx = pre4 * (mu(i,1)*mu(j,2) - mu(i,2)*mu(j,1));
+          F_FLOAT crossy = pre4 * (mu(i,2)*mu(j,0) - mu(i,0)*mu(j,2));
+          F_FLOAT crossz = pre4 * (mu(i,0)*mu(j,1) - mu(i,1)*mu(j,0));
+
+          tixcoul += crossx + pre2 * (mu(i,1)*delz - mu(i,2)*dely);
+          tiycoul += crossy + pre2 * (mu(i,2)*delx - mu(i,0)*delz);
+          tizcoul += crossz + pre2 * (mu(i,0)*dely - mu(i,1)*delx);
+          tjxcoul += -crossx + pre3 * (mu(j,1)*delz - mu(j,2)*dely);
+          tjycoul += -crossy + pre3 * (mu(j,2)*delx - mu(j,0)*delz);
+          tjzcoul += -crossz + pre3 * (mu(j,0)*dely - mu(j,1)*delx);
+        }
+
+        // dipole-charge
+
+        if (mui > 0 && qj != 0) {
+          pidotr = mu(i,0)*delx + mu(i,1)*dely + mu(i,2)*delz;
+          F_FLOAT pre1 = 3.0*qj*r5inv * pidotr;
+          F_FLOAT pre2 = qj*r3inv;
+
+          forcecoulx += pre2*mu(i,0) - pre1*delx;
+          forcecouly += pre2*mu(i,1) - pre1*dely;
+          forcecoulz += pre2*mu(i,2) - pre1*delz;
+          tixcoul += pre2 * (mu(i,1)*delz - mu(i,2)*dely);
+          tiycoul += pre2 * (mu(i,2)*delx - mu(i,0)*delz);
+          tizcoul += pre2 * (mu(i,0)*dely - mu(i,1)*delx);
+        }
+
+        // charge-dipole
+
+        if (qtmp != 0 && muj > 0) {
+          pjdotr = mu(j,0)*delx + mu(j,1)*dely + mu(j,2)*delz;
+          X_FLOAT pre1 = 3.0*qtmp*r5inv * pjdotr;
+          X_FLOAT pre2 = qtmp*r3inv;
+
+          forcecoulx += pre1*delx - pre2*mu(j,0);
+          forcecouly += pre1*dely - pre2*mu(j,1);
+          forcecoulz += pre1*delz - pre2*mu(j,2);
+          tjxcoul += -pre2 * (mu(j,1)*delz - mu(j,2)*dely);
+          tjycoul += -pre2 * (mu(j,2)*delx - mu(j,0)*delz);
+          tjzcoul += -pre2 * (mu(j,0)*dely - mu(j,1)*delx);
+        }
+
+        F_FLOAT fq = factor_coul*qqrd2e;
+        fx = fq*forcecoulx + delx*forcelj;
+        fy = fq*forcecouly + dely*forcelj;
+        fz = fq*forcecoulz + delz*forcelj;
+
+        // force & torque accumulation
+
+        fx_i += fx;
+        fy_i += fy;
+        fz_i += fz;
+        torquex_i += fq*tixcoul;
+        torquey_i += fq*tiycoul;
+        torquez_i += fq*tizcoul;
+
+        if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
+          a_f(j,0) -= fx;
+          a_f(j,1) -= fy;
+          a_f(j,2) -= fz;
+          a_torque(j,0) += fq*tjxcoul;
+          a_torque(j,1) += fq*tjycoul;
+          a_torque(j,2) += fq*tjzcoul;
+        }
+
+        if (EVFLAG && eflag_global) {
+          ecoul = qtmp*qj*rinv;
+          if (mu(i,3) > 0.0 && mu(j,3) > 0.0)
+            ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr;
+          if (mu(i,3) > 0.0 && qj != 0.0)
+            ecoul += -qj*r3inv*pidotr;
+          if (mu(j,3) > 0.0 && qtmp != 0.0)
+            ecoul += qtmp*r3inv*pjdotr;
+          ecoul *= factor_coul*qqrd2e;
+          ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*ecoul;
+        }
+      } // cutsq_coulsq_ij
+
+      if (EVFLAG && (eflag_atom || vflag_either))
+        ev_tally_xyz<NEIGHFLAG, NEWTON_PAIR>(ev, i, j, ecoul+evdwl, fx, fy, fz, delx, dely, delz);
+    } // cutsq_ij
+  }
+
+  a_f(i,0) += fx_i;
+  a_f(i,1) += fy_i;
+  a_f(i,2) += fz_i;
+  a_torque(i,0) += torquex_i;
+  a_torque(i,1) += torquey_i;
+  a_torque(i,2) += torquez_i;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
+KOKKOS_INLINE_FUNCTION
+void PairLJCutDipoleCutKokkos<DeviceType>::operator()(TagPairLJCutDipoleCutKernel<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairLJCutDipoleCutKernel<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR>
+KOKKOS_INLINE_FUNCTION
+void PairLJCutDipoleCutKokkos<DeviceType>::ev_tally_xyz(EV_FLOAT & ev, int i, int j, const F_FLOAT &epair,
+                                                        F_FLOAT fx, F_FLOAT fy, F_FLOAT fz,
+                                                        X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const
+{
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (eflag_atom) {
+    const E_FLOAT epairhalf = 0.5 * epair;
+    if (NEIGHFLAG == FULL || newton_pair || i < nlocal)
+      v_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL && (newton_pair || j < nlocal))
+      v_eatom[j] += epairhalf;
+  }
+
+  if (vflag_either) {
+    const F_FLOAT v0 = delx*fx;
+    const F_FLOAT v1 = dely*fy;
+    const F_FLOAT v2 = delz*fz;
+    const F_FLOAT v3 = delx*fy;
+    const F_FLOAT v4 = delx*fz;
+    const F_FLOAT v5 = dely*fz;
+
+    if (vflag_global) {
+      if (NEIGHFLAG != FULL) {
+        if (NEWTON_PAIR) { // neigh half, newton on
+          ev.v[0] += v0;
+          ev.v[1] += v1;
+          ev.v[2] += v2;
+          ev.v[3] += v3;
+          ev.v[4] += v4;
+          ev.v[5] += v5;
+        } else { // neigh half, newton off
+          if (i < nlocal) {
+            ev.v[0] += 0.5*v0;
+            ev.v[1] += 0.5*v1;
+            ev.v[2] += 0.5*v2;
+            ev.v[3] += 0.5*v3;
+            ev.v[4] += 0.5*v4;
+            ev.v[5] += 0.5*v5;
+          }
+          if (j < nlocal) {
+            ev.v[0] += 0.5*v0;
+            ev.v[1] += 0.5*v1;
+            ev.v[2] += 0.5*v2;
+            ev.v[3] += 0.5*v3;
+            ev.v[4] += 0.5*v4;
+            ev.v[5] += 0.5*v5;
+          }
+        }
+      } else { //neigh full
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+
+      if (NEIGHFLAG == FULL || NEWTON_PAIR || i < nlocal) {
+        v_vatom(i,0) += 0.5*v0;
+        v_vatom(i,1) += 0.5*v1;
+        v_vatom(i,2) += 0.5*v2;
+        v_vatom(i,3) += 0.5*v3;
+        v_vatom(i,4) += 0.5*v4;
+        v_vatom(i,5) += 0.5*v5;
+      }
+      if (NEIGHFLAG != FULL && (NEWTON_PAIR || j < nlocal)) {
+        v_vatom(j,0) += 0.5*v0;
+        v_vatom(j,1) += 0.5*v1;
+        v_vatom(j,2) += 0.5*v2;
+        v_vatom(j,3) += 0.5*v3;
+        v_vatom(j,4) += 0.5*v4;
+        v_vatom(j,5) += 0.5*v5;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutDipoleCutKokkos<DeviceType>::allocate()
+{
+  PairLJCutDipoleCut::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+  memory->destroy(cut_ljsq);
+  memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
+  d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
+  memory->destroy(cut_coulsq);
+  memoryKK->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq");
+  d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
+  k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutDipoleCut::params",n+1,n+1);
+  params = k_params.template view<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutDipoleCutKokkos<DeviceType>::settings(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR,"Illegal pair_style command");
+
+  PairLJCutDipoleCut::settings(1,arg);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutDipoleCutKokkos<DeviceType>::init_style()
+{
+  PairLJCutDipoleCut::init_style();
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // adjust neighbor list request for KOKKOS
+
+  neighflag = lmp->kokkos->neighflag;
+  auto request = neighbor->find_request(this);
+  request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
+                           !std::is_same<DeviceType,LMPDeviceType>::value);
+  request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
+  if (neighflag == FULL) request->enable_full();
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairLJCutDipoleCutKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairLJCutDipoleCut::init_one(i,j);
+  double cut_ljsqm = cut_ljsq[i][j];
+  double cut_coulsqm = cut_coulsq[i][j];
+
+  k_params.h_view(i,j).lj1 = lj1[i][j];
+  k_params.h_view(i,j).lj2 = lj2[i][j];
+  k_params.h_view(i,j).lj3 = lj3[i][j];
+  k_params.h_view(i,j).lj4 = lj4[i][j];
+  k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
+  k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
+
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+  if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+    m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
+    m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
+  }
+
+  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_cut_ljsq.h_view(i,j) = k_cut_ljsq.h_view(j,i) = cut_ljsqm;
+  k_cut_ljsq.template modify<LMPHostType>();
+  k_cut_coulsq.h_view(i,j) = k_cut_coulsq.h_view(j,i) = cut_coulsqm;
+  k_cut_coulsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+int PairLJCutDipoleCutKokkos<DeviceType>::sbmask(const int& j) const {
+  return j >> SBBITS & 3;
+}
+
+
+namespace LAMMPS_NS {
+template class PairLJCutDipoleCutKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class PairLJCutDipoleCutKokkos<LMPHostType>;
+#endif
+}
+
--- a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h
@ -0,0 +1,119 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lj/cut/dipole/cut/kk,PairLJCutDipoleCutKokkos<LMPDeviceType>);
+PairStyle(lj/cut/dipole/cut/kk/device,PairLJCutDipoleCutKokkos<LMPDeviceType>);
+PairStyle(lj/cut/dipole/cut/kk/host,PairLJCutDipoleCutKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_KOKKOS_H
+#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_lj_cut_dipole_cut.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
+struct TagPairLJCutDipoleCutKernel{};
+
+template<class DeviceType>
+class PairLJCutDipoleCutKokkos : public PairLJCutDipoleCut {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=1};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+  PairLJCutDipoleCutKokkos(class LAMMPS *);
+  ~PairLJCutDipoleCutKokkos() override;
+
+  void compute(int, int) override;
+
+  void settings(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairLJCutDipoleCutKernel<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int, EV_FLOAT &ev) const;
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairLJCutDipoleCutKernel<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally_xyz(EV_FLOAT &ev, int i, int j, const F_FLOAT &epair,
+                    F_FLOAT fx, F_FLOAT fy, F_FLOAT fz,
+                    X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const;
+
+  KOKKOS_INLINE_FUNCTION
+  int sbmask(const int& j) const;
+
+ protected:
+  Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj_coul**,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
+  // hardwired to space for 12 atom types
+  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename AT::t_x_array_randomread x;
+  typename AT::t_x_array c_x;
+  typename AT::t_f_array f;
+  typename AT::t_f_array torque;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_float_1d_randomread q;
+  typename AT::t_mu_array_randomread mu;
+  typename AT::t_mu_array c_mu;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  typename AT::tdual_ffloat_2d k_cutsq;
+  typename AT::t_ffloat_2d d_cutsq;
+  typename AT::tdual_ffloat_2d k_cut_ljsq;
+  typename AT::t_ffloat_2d d_cut_ljsq;
+  typename AT::tdual_ffloat_2d k_cut_coulsq;
+  typename AT::t_ffloat_2d d_cut_coulsq;
+
+
+  int neighflag,newton_pair;
+  int nlocal,nall,eflag,vflag;
+
+  double special_coul[4];
+  double special_lj[4];
+  double qqrd2e;
+
+  typename AT::t_neighbors_2d d_neighbors;
+  typename AT::t_int_1d_randomread d_ilist;
+  typename AT::t_int_1d_randomread d_numneigh;
+
+  void allocate() override;
+  friend void pair_virial_fdotr_compute<PairLJCutDipoleCutKokkos>(PairLJCutDipoleCutKokkos*);
+};
+
+}
+
+#endif
+#endif
+
--- a/src/KOKKOS/pair_lj_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp
@ -66,7 +66,6 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  eflag = eflag_in;
  vflag = vflag_in;

-
  if (neighflag == FULL) no_virial_fdotr_compute = 1;

  ev_init(eflag,vflag,0);
--- a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.cpp
@ -0,0 +1,498 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_lj_expand_coul_long_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "math_const.h"
+#include "memory_kokkos.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "respa.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJExpandCoulLongKokkos<DeviceType>::PairLJExpandCoulLongKokkos(LAMMPS *lmp) : PairLJExpandCoulLong(lmp)
+{
+  respa_enable = 0;
+
+  kokkosable = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJExpandCoulLongKokkos<DeviceType>::~PairLJExpandCoulLongKokkos()
+{
+  if (copymode) return;
+
+  if (allocated) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->destroy_kokkos(k_cutsq,cutsq);
+    memoryKK->destroy_kokkos(k_cut_ljsq,cut_ljsq);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJExpandCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  ev_init(eflag,vflag,0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_cut_ljsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  special_coul[0] = force->special_coul[0];
+  special_coul[1] = force->special_coul[1];
+  special_coul[2] = force->special_coul[2];
+  special_coul[3] = force->special_coul[3];
+  qqrd2e = force->qqrd2e;
+  newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  copymode = 1;
+
+  EV_FLOAT ev;
+  if (ncoultablebits)
+    ev = pair_compute<PairLJExpandCoulLongKokkos<DeviceType>,CoulLongTable<1> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+  else
+    ev = pair_compute<PairLJExpandCoulLongKokkos<DeviceType>,CoulLongTable<0> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+
+
+  if (eflag_global) {
+    eng_vdwl += ev.evdwl;
+    eng_coul += ev.ecoul;
+  }
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  copymode = 0;
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ 12-6 pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJExpandCoulLongKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
+              const int& itype, const int& jtype) const {
+  const F_FLOAT r = sqrt(rsq);
+  const F_FLOAT rshift = r - (STACKPARAMS?m_params[itype][jtype].shift:params(itype,jtype).shift);
+  const F_FLOAT rshiftsq = rshift*rshift;
+  const F_FLOAT r2inv = 1.0/rshiftsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+
+  const F_FLOAT forcelj = r6inv *
+    ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
+     (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
+
+  return forcelj/rshift/r;
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS,  class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJExpandCoulLongKokkos<DeviceType>::
+compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
+              const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  if (Specialisation::DoTable && rsq > tabinnersq) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
+    F_FLOAT forcecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      forcecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return forcecoul/rsq;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT rinv = 1.0/r;
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
+    F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+
+    return forcecoul*rinv*rinv;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ 12-6 pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJExpandCoulLongKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
+              const int& itype, const int& jtype) const {
+
+  const F_FLOAT r = sqrt(rsq);
+  const F_FLOAT rshift = r - (STACKPARAMS?m_params[itype][jtype].shift:params(itype,jtype).shift);
+  const F_FLOAT rshiftsq = rshift*rshift;
+  const F_FLOAT r2inv = 1.0/rshiftsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+
+  return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv
+        - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4))
+        - (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJExpandCoulLongKokkos<DeviceType>::
+compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
+              const int& /*itype*/, const int& /*jtype*/,
+              const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  if (Specialisation::DoTable && rsq > tabinnersq) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
+    F_FLOAT ecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      ecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return ecoul;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
+    F_FLOAT ecoul = prefactor * erfc;
+    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+    return ecoul;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJExpandCoulLongKokkos<DeviceType>::allocate()
+{
+  PairLJExpandCoulLong::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+
+  memory->destroy(cut_ljsq);
+  memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
+  d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
+
+  d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1);
+
+  k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJExpandCoulLong::params",n+1,n+1);
+  params = k_params.template view<DeviceType>();
+}
+
+template<class DeviceType>
+void PairLJExpandCoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
+{
+  Pair::init_tables(cut_coul,cut_respa);
+
+  typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
+  typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
+
+  int ntable = 1;
+  for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
+
+
+  // Copy rtable and drtable
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = rtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_rtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = drtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_drtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ftable and dftable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = ftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = dftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ctable and dctable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = ctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = dctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy etable and detable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = etable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_etable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = detable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_detable = d_table;
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJExpandCoulLongKokkos<DeviceType>::settings(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR,"Illegal pair_style command");
+
+  PairLJExpandCoulLong::settings(narg,arg);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJExpandCoulLongKokkos<DeviceType>::init_style()
+{
+  PairLJExpandCoulLong::init_style();
+
+  Kokkos::deep_copy(d_cut_coulsq,cut_coulsq);
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // adjust neighbor list request for KOKKOS
+
+  neighflag = lmp->kokkos->neighflag;
+  auto request = neighbor->find_request(this);
+  request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
+                           !std::is_same<DeviceType,LMPDeviceType>::value);
+  request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
+  if (neighflag == FULL) request->enable_full();
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairLJExpandCoulLongKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairLJExpandCoulLong::init_one(i,j);
+  double cut_ljsqm = cut_ljsq[i][j];
+
+  k_params.h_view(i,j).lj1 = lj1[i][j];
+  k_params.h_view(i,j).lj2 = lj2[i][j];
+  k_params.h_view(i,j).lj3 = lj3[i][j];
+  k_params.h_view(i,j).lj4 = lj4[i][j];
+  k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).shift = shift[i][j];
+  k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
+  k_params.h_view(i,j).cut_coulsq = cut_coulsq;
+
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+  if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+    m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
+    m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsq;
+  }
+
+  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_cut_ljsq.h_view(i,j) = k_cut_ljsq.h_view(j,i) = cut_ljsqm;
+  k_cut_ljsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+namespace LAMMPS_NS {
+template class PairLJExpandCoulLongKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class PairLJExpandCoulLongKokkos<LMPHostType>;
+#endif
+}
+
--- a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h
@ -0,0 +1,148 @@
+// clang-format off
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lj/expand/coul/long/kk,PairLJExpandCoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/expand/coul/long/kk/device,PairLJExpandCoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/expand/coul/long/kk/host,PairLJExpandCoulLongKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_PAIR_LJ_EXPAND_COUL_LONG_KOKKOS_H
+#define LMP_PAIR_LJ_EXPAND_COUL_LONG_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_lj_expand_coul_long.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairLJExpandCoulLongKokkos : public PairLJExpandCoulLong {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=1};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  PairLJExpandCoulLongKokkos(class LAMMPS *);
+  ~PairLJExpandCoulLongKokkos() override;
+
+  void compute(int, int) override;
+
+  void settings(int, char **) override;
+  void init_tables(double cut_coul, double *cut_respa) override;
+  void init_style() override;
+  double init_one(int, int) override;
+
+  struct params_lj_coul{
+    KOKKOS_INLINE_FUNCTION
+    params_lj_coul() {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;shift=0;};
+    KOKKOS_INLINE_FUNCTION
+    params_lj_coul(int /*i*/) {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;shift=0;};
+    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset,shift;
+  };
+
+ protected:
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
+                        const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj_coul**,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
+  // hardwired to space for 12 atom types
+  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename AT::t_x_array_randomread x;
+  typename AT::t_x_array c_x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_float_1d_randomread q;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  int newton_pair;
+
+  typename AT::tdual_ffloat_2d k_cutsq;
+  typename AT::t_ffloat_2d d_cutsq;
+  typename AT::tdual_ffloat_2d k_cut_ljsq;
+  typename AT::t_ffloat_2d d_cut_ljsq;
+  typename AT::t_ffloat_2d d_cut_coulsq;
+
+  typename AT::t_ffloat_1d_randomread
+    d_rtable, d_drtable, d_ftable, d_dftable,
+    d_ctable, d_dctable, d_etable, d_detable;
+
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  double special_coul[4];
+  double special_lj[4];
+  double qqrd2e;
+
+  void allocate() override;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,
+                                                            NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,
+                                                            NeighListKokkos<DeviceType>*);
+  friend void pair_virial_fdotr_compute<PairLJExpandCoulLongKokkos>(PairLJExpandCoulLongKokkos*);
+};
+
+}
+
+#endif
+#endif
+
--- a/src/atom_masks.h
+++ b/src/atom_masks.h
@ -42,6 +42,7 @@
 #define MAP_MASK       0x00008000
 #define ENERGY_MASK    0x00010000
 #define VIRIAL_MASK    0x00020000
+#define MU_MASK        0x00040000

 // SPIN