diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index 7f7b2d4b7d..0912ddcc41 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -166,7 +166,7 @@ OPT. * :doc:`lj/cut/coul/msm (go) ` * :doc:`lj/cut/coul/msm/dielectric ` * :doc:`lj/cut/coul/wolf (o) ` - * :doc:`lj/cut/dipole/cut (go) ` + * :doc:`lj/cut/dipole/cut (gko) ` * :doc:`lj/cut/dipole/long (g) ` * :doc:`lj/cut/dipole/sf (go) ` * :doc:`lj/cut/soft (o) ` @@ -175,7 +175,7 @@ OPT. * :doc:`lj/cut/tip4p/long (got) ` * :doc:`lj/cut/tip4p/long/soft (o) ` * :doc:`lj/expand (gko) ` - * :doc:`lj/expand/coul/long (g) ` + * :doc:`lj/expand/coul/long (gk) ` * :doc:`lj/gromacs (gko) ` * :doc:`lj/gromacs/coul/gromacs (ko) ` * :doc:`lj/long/coul/long (iot) ` diff --git a/doc/src/pair_dipole.rst b/doc/src/pair_dipole.rst index 6253b2916d..10d0061948 100644 --- a/doc/src/pair_dipole.rst +++ b/doc/src/pair_dipole.rst @@ -1,5 +1,6 @@ .. index:: pair_style lj/cut/dipole/cut .. index:: pair_style lj/cut/dipole/cut/gpu +.. index:: pair_style lj/cut/dipole/cut/kk .. index:: pair_style lj/cut/dipole/cut/omp .. index:: pair_style lj/sf/dipole/sf .. index:: pair_style lj/sf/dipole/sf/gpu @@ -11,7 +12,7 @@ pair_style lj/cut/dipole/cut command ==================================== -Accelerator Variants: *lj/cut/dipole/cut/gpu*, *lj/cut/dipole/cut/omp* +Accelerator Variants: *lj/cut/dipole/cut/gpu*, *lj/cut/dipole/cut/kk*, *lj/cut/dipole/cut/omp* pair_style lj/sf/dipole/sf command ================================== diff --git a/doc/src/pair_lj_expand.rst b/doc/src/pair_lj_expand.rst index dca7a2dd2f..ffdcbf66e3 100644 --- a/doc/src/pair_lj_expand.rst +++ b/doc/src/pair_lj_expand.rst @@ -3,6 +3,7 @@ .. index:: pair_style lj/expand/kk .. index:: pair_style lj/expand/omp .. index:: pair_style lj/expand/coul/long +.. index:: pair_style lj/expand/coul/long/kk .. index:: pair_style lj/expand/coul/long/gpu pair_style lj/expand command @@ -13,7 +14,7 @@ Accelerator Variants: *lj/expand/gpu*, *lj/expand/kk*, *lj/expand/omp* pair_style lj/expand/coul/long command ====================================== -Accelerator Variants: *lj/expand/coul/long/gpu* +Accelerator Variants: *lj/expand/coul/long/gpu*, *lj/expand/coul/long/kk* Syntax """""" diff --git a/src/.gitignore b/src/.gitignore index f8efbc8f50..22cb5f1ee0 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -102,6 +102,8 @@ /meam*.cpp /pair_meam.cpp /pair_meam.h +/pair_meam_ms.cpp +/pair_meam_ms.h /compute_mliap.cpp /compute_mliap.h diff --git a/src/DIPOLE/atom_vec_dipole.h b/src/DIPOLE/atom_vec_dipole.h index 923454a39f..d2f5746462 100644 --- a/src/DIPOLE/atom_vec_dipole.h +++ b/src/DIPOLE/atom_vec_dipole.h @@ -31,7 +31,7 @@ class AtomVecDipole : virtual public AtomVec { void grow_pointers() override; void data_atom_post(int) override; - private: + protected: double **mu; }; diff --git a/src/DIPOLE/pair_lj_cut_dipole_cut.cpp b/src/DIPOLE/pair_lj_cut_dipole_cut.cpp index a7e5674a88..237eaa183e 100644 --- a/src/DIPOLE/pair_lj_cut_dipole_cut.cpp +++ b/src/DIPOLE/pair_lj_cut_dipole_cut.cpp @@ -39,6 +39,8 @@ PairLJCutDipoleCut::PairLJCutDipoleCut(LAMMPS *lmp) : Pair(lmp) PairLJCutDipoleCut::~PairLJCutDipoleCut() { + if (copymode) return; + if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); diff --git a/src/DIPOLE/pair_lj_cut_dipole_cut.h b/src/DIPOLE/pair_lj_cut_dipole_cut.h index e563077835..03270f0489 100644 --- a/src/DIPOLE/pair_lj_cut_dipole_cut.h +++ b/src/DIPOLE/pair_lj_cut_dipole_cut.h @@ -46,7 +46,7 @@ class PairLJCutDipoleCut : public Pair { double **epsilon, **sigma; double **lj1, **lj2, **lj3, **lj4, **offset; - void allocate(); + virtual void allocate(); }; } // namespace LAMMPS_NS diff --git a/src/EXTRA-PAIR/pair_lj_expand_coul_long.cpp b/src/EXTRA-PAIR/pair_lj_expand_coul_long.cpp index a63c41644f..119ad6edbf 100644 --- a/src/EXTRA-PAIR/pair_lj_expand_coul_long.cpp +++ b/src/EXTRA-PAIR/pair_lj_expand_coul_long.cpp @@ -60,6 +60,8 @@ PairLJExpandCoulLong::PairLJExpandCoulLong(LAMMPS *lmp) : Pair(lmp) PairLJExpandCoulLong::~PairLJExpandCoulLong() { + if (copymode) return; + if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index f493b5438a..65fec6999e 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -64,8 +64,8 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp action atom_vec_bond_kokkos.h atom_vec_bond.h action atom_vec_charge_kokkos.cpp action atom_vec_charge_kokkos.h -action atom_vec_spin_kokkos.cpp atom_vec_spin.cpp -action atom_vec_spin_kokkos.h atom_vec_spin.h +action atom_vec_dipole_kokkos.cpp atom_vec_dipole.cpp +action atom_vec_dipole_kokkos.h atom_vec_dipole.h action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp action atom_vec_dpd_kokkos.h atom_vec_dpd.h action atom_vec_full_kokkos.cpp atom_vec_full.cpp @@ -78,6 +78,8 @@ action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp action atom_vec_molecular_kokkos.h atom_vec_molecular.h action atom_vec_sphere_kokkos.cpp atom_vec_sphere.cpp action atom_vec_sphere_kokkos.h atom_vec_sphere.h +action atom_vec_spin_kokkos.cpp atom_vec_spin.cpp +action atom_vec_spin_kokkos.h atom_vec_spin.h action bond_class2_kokkos.cpp bond_class2.cpp action bond_class2_kokkos.h bond_class2.h action bond_fene_kokkos.cpp bond_fene.cpp @@ -94,10 +96,10 @@ action compute_coord_atom_kokkos.cpp action compute_coord_atom_kokkos.h action compute_orientorder_atom_kokkos.cpp action compute_orientorder_atom_kokkos.h -action compute_temp_kokkos.cpp -action compute_temp_kokkos.h action compute_temp_deform_kokkos.cpp action compute_temp_deform_kokkos.h +action compute_temp_kokkos.cpp +action compute_temp_kokkos.h action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp action dihedral_charmm_kokkos.h dihedral_charmm.h action dihedral_class2_kokkos.cpp dihedral_class2.cpp @@ -110,13 +112,15 @@ action domain_kokkos.cpp action domain_kokkos.h action dynamical_matrix_kokkos.cpp dynamical_matrix.cpp action dynamical_matrix_kokkos.h dynamical_matrix.h -action fftdata_kokkos.h fft3d.h action fft3d_kokkos.cpp fft3d.cpp action fft3d_kokkos.h fft3d.h +action fftdata_kokkos.h fft3d.h action fix_acks2_reaxff_kokkos.cpp fix_acks2_reaxff.cpp action fix_acks2_reaxff_kokkos.h fix_acks2_reaxff.h action fix_deform_kokkos.cpp action fix_deform_kokkos.h +action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp +action fix_dpd_energy_kokkos.h fix_dpd_energy.h action fix_dt_reset_kokkos.cpp action fix_dt_reset_kokkos.h action fix_enforce2d_kokkos.cpp @@ -131,6 +135,8 @@ action fix_langevin_kokkos.cpp action fix_langevin_kokkos.h action fix_minimize_kokkos.cpp action fix_minimize_kokkos.h +action fix_momentum_kokkos.cpp +action fix_momentum_kokkos.h action fix_neigh_history_kokkos.cpp action fix_neigh_history_kokkos.h action fix_nh_kokkos.cpp @@ -155,24 +161,20 @@ action fix_reaxff_bonds_kokkos.cpp fix_reaxff_bonds.cpp action fix_reaxff_bonds_kokkos.h fix_reaxff_bonds.h action fix_reaxff_species_kokkos.cpp fix_reaxff_species.cpp action fix_reaxff_species_kokkos.h fix_reaxff_species.h +action fix_rx_kokkos.cpp fix_rx.cpp +action fix_rx_kokkos.h fix_rx.h action fix_setforce_kokkos.cpp action fix_setforce_kokkos.h action fix_shake_kokkos.cpp fix_shake.cpp action fix_shake_kokkos.h fix_shake.h action fix_shardlow_kokkos.cpp fix_shardlow.cpp action fix_shardlow_kokkos.h fix_shardlow.h -action fix_momentum_kokkos.cpp -action fix_momentum_kokkos.h +action fix_viscous_kokkos.cpp +action fix_viscous_kokkos.h action fix_wall_lj93_kokkos.cpp action fix_wall_lj93_kokkos.h action fix_wall_reflect_kokkos.cpp action fix_wall_reflect_kokkos.h -action fix_viscous_kokkos.cpp -action fix_viscous_kokkos.h -action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp -action fix_dpd_energy_kokkos.h fix_dpd_energy.h -action fix_rx_kokkos.cpp fix_rx.cpp -action fix_rx_kokkos.h fix_rx.h action grid3d_kokkos.cpp fft3d.h action grid3d_kokkos.h fft3d.h action improper_class2_kokkos.cpp improper_class2.cpp @@ -180,36 +182,48 @@ action improper_class2_kokkos.h improper_class2.h action improper_harmonic_kokkos.cpp improper_harmonic.cpp action improper_harmonic_kokkos.h improper_harmonic.h action kissfft_kokkos.h kissfft.h -action kokkos.cpp -action kokkos.h -action kokkos_base.h action kokkos_base_fft.h fft3d.h +action kokkos_base.h action kokkos_few.h action kokkos_type.h -action meam_kokkos.h meam.h +action kokkos.cpp +action kokkos.h +action math_special_kokkos.cpp +action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp action meam_dens_init_kokkos.h meam_dens_init.cpp action meam_force_kokkos.h meam_force.cpp action meam_funcs_kokkos.h meam_funcs.cpp action meam_impl_kokkos.h meam_impl.cpp +action meam_kokkos.h meam.h action meam_setup_done_kokkos.h meam_setup_done.cpp action memory_kokkos.h +action min_cg_kokkos.cpp +action min_cg_kokkos.h +action min_kokkos.cpp +action min_kokkos.h +action min_linesearch_kokkos.cpp +action min_linesearch_kokkos.h action mliap_data_kokkos.cpp mliap_data.cpp action mliap_data_kokkos.h mliap_data.h action mliap_descriptor_kokkos.h mliap_descriptor.h action mliap_descriptor_so3_kokkos.cpp mliap_descriptor_so3.cpp action mliap_descriptor_so3_kokkos.h mliap_descriptor_so3.h +action mliap_model_kokkos.h mliap_model.h action mliap_model_linear_kokkos.cpp mliap_model_linear.cpp action mliap_model_linear_kokkos.h mliap_model_linear.h action mliap_model_python_kokkos.cpp mliap_model_linear.cpp action mliap_model_python_kokkos.h mliap_model_linear.h -action mliap_model_kokkos.h mliap_model.h -action mliap_unified_kokkos.cpp mliap_unified.cpp -action mliap_unified_kokkos.h mliap_unified.h action mliap_so3_kokkos.cpp mliap_so3.cpp action mliap_so3_kokkos.h mliap_so3.h +action mliap_unified_kokkos.cpp mliap_unified.cpp +action mliap_unified_kokkos.h mliap_unified.h action modify_kokkos.cpp action modify_kokkos.h +action nbin_kokkos.cpp +action nbin_kokkos.h +action nbin_ssa_kokkos.cpp nbin_ssa.cpp +action nbin_ssa_kokkos.h nbin_ssa.h action neigh_bond_kokkos.cpp action neigh_bond_kokkos.h action neigh_list_kokkos.cpp @@ -220,26 +234,14 @@ action npair_copy_kokkos.cpp action npair_copy_kokkos.h action npair_halffull_kokkos.cpp action npair_halffull_kokkos.h -action npair_skip_kokkos.cpp -action npair_skip_kokkos.h -action npair_trim_kokkos.cpp -action npair_trim_kokkos.h action npair_kokkos.cpp action npair_kokkos.h +action npair_skip_kokkos.cpp +action npair_skip_kokkos.h action npair_ssa_kokkos.cpp npair_half_bin_newton_ssa.cpp action npair_ssa_kokkos.h npair_half_bin_newton_ssa.h -action nbin_kokkos.cpp -action nbin_kokkos.h -action nbin_ssa_kokkos.cpp nbin_ssa.cpp -action nbin_ssa_kokkos.h nbin_ssa.h -action math_special_kokkos.cpp -action math_special_kokkos.h -action min_cg_kokkos.cpp -action min_cg_kokkos.h -action min_kokkos.cpp -action min_kokkos.h -action min_linesearch_kokkos.cpp -action min_linesearch_kokkos.h +action npair_trim_kokkos.cpp +action npair_trim_kokkos.h action pack_kokkos.h pack.h action pair_adp_kokkos.cpp pair_adp.cpp action pair_adp_kokkos.h pair_adp.h @@ -259,26 +261,26 @@ action pair_coul_long_kokkos.cpp pair_coul_long.cpp action pair_coul_long_kokkos.h pair_coul_long.h action pair_coul_wolf_kokkos.cpp action pair_coul_wolf_kokkos.h -action pair_dpd_kokkos.h pair_dpd.h -action pair_dpd_kokkos.cpp pair_dpd.cpp action pair_dpd_ext_kokkos.cpp pair_dpd_ext.cpp action pair_dpd_ext_kokkos.h pair_dpd_ext.h -action pair_dpd_ext_tstat_kokkos.h pair_dpd_ext_tstat.h action pair_dpd_ext_tstat_kokkos.cpp pair_dpd_ext_tstat.cpp -action pair_dpd_tstat_kokkos.h pair_dpd_tstat.h -action pair_dpd_tstat_kokkos.cpp pair_dpd_tstat.cpp +action pair_dpd_ext_tstat_kokkos.h pair_dpd_ext_tstat.h action pair_dpd_fdt_energy_kokkos.cpp pair_dpd_fdt_energy.cpp action pair_dpd_fdt_energy_kokkos.h pair_dpd_fdt_energy.h -action pair_eam_kokkos.cpp pair_eam.cpp -action pair_eam_kokkos.h pair_eam.h +action pair_dpd_kokkos.cpp pair_dpd.cpp +action pair_dpd_kokkos.h pair_dpd.h +action pair_dpd_tstat_kokkos.cpp pair_dpd_tstat.cpp +action pair_dpd_tstat_kokkos.h pair_dpd_tstat.h action pair_eam_alloy_kokkos.cpp pair_eam_alloy.cpp action pair_eam_alloy_kokkos.h pair_eam_alloy.h action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp action pair_eam_fs_kokkos.h pair_eam_fs.h +action pair_eam_kokkos.cpp pair_eam.cpp +action pair_eam_kokkos.h pair_eam.h action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp action pair_exp6_rx_kokkos.h pair_exp6_rx.h -action pair_gran_hooke_history_kokkos.h pair_gran_hooke_history.h action pair_gran_hooke_history_kokkos.cpp pair_gran_hooke_history.cpp +action pair_gran_hooke_history_kokkos.h pair_gran_hooke_history.h action pair_hybrid_kokkos.cpp action pair_hybrid_kokkos.h action pair_hybrid_overlay_kokkos.cpp @@ -304,8 +306,12 @@ action pair_lj_cut_coul_dsf_kokkos.cpp pair_lj_cut_coul_dsf.cpp action pair_lj_cut_coul_dsf_kokkos.h pair_lj_cut_coul_dsf.h action pair_lj_cut_coul_long_kokkos.cpp pair_lj_cut_coul_long.cpp action pair_lj_cut_coul_long_kokkos.h pair_lj_cut_coul_long.h +action pair_lj_cut_dipole_cut_kokkos.cpp pair_lj_cut_dipole_cut.cpp +action pair_lj_cut_dipole_cut_kokkos.h pair_lj_cut_dipole_cut.h action pair_lj_cut_kokkos.cpp action pair_lj_cut_kokkos.h +action pair_lj_expand_coul_long_kokkos.cpp pair_lj_expand_coul_long.cpp +action pair_lj_expand_coul_long_kokkos.h pair_lj_expand_coul_long.h action pair_lj_expand_kokkos.cpp action pair_lj_expand_kokkos.h action pair_lj_gromacs_coul_gromacs_kokkos.cpp pair_lj_gromacs_coul_gromacs.cpp @@ -324,19 +330,17 @@ action pair_morse_kokkos.cpp action pair_morse_kokkos.h action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h -action pair_pace_kokkos.cpp pair_pace.cpp -action pair_pace_kokkos.h pair_pace.h action pair_pace_extrapolation_kokkos.cpp pair_pace_extrapolation.cpp action pair_pace_extrapolation_kokkos.h pair_pace_extrapolation.h +action pair_pace_kokkos.cpp pair_pace.cpp +action pair_pace_kokkos.h pair_pace.h action pair_reaxff_kokkos.cpp pair_reaxff.cpp action pair_reaxff_kokkos.h pair_reaxff.h +action pair_snap_kokkos_impl.h pair_snap.cpp action pair_snap_kokkos.cpp pair_snap.cpp action pair_snap_kokkos.h pair_snap.h -action pair_snap_kokkos_impl.h pair_snap.cpp action pair_sw_kokkos.cpp pair_sw.cpp action pair_sw_kokkos.h pair_sw.h -action pair_vashishta_kokkos.cpp pair_vashishta.cpp -action pair_vashishta_kokkos.h pair_vashishta.h action pair_table_kokkos.cpp action pair_table_kokkos.h action pair_table_rx_kokkos.cpp pair_table_rx.cpp @@ -347,6 +351,8 @@ action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h +action pair_vashishta_kokkos.cpp pair_vashishta.cpp +action pair_vashishta_kokkos.h pair_vashishta.h action pair_yukawa_kokkos.cpp action pair_yukawa_kokkos.h action pair_zbl_kokkos.cpp @@ -359,8 +365,8 @@ action region_block_kokkos.cpp action region_block_kokkos.h action remap_kokkos.cpp remap.cpp action remap_kokkos.h remap.h -action sna_kokkos.h sna.h action sna_kokkos_impl.h sna.cpp +action sna_kokkos.h sna.h action third_order_kokkos.cpp dynamical_matrix.cpp action third_order_kokkos.h dynamical_matrix.h action transpose_helper_kokkos.h diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 08bfaf20cd..8d2ae47f0e 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -34,6 +34,7 @@ class AtomKokkos : public Atom { DAT::tdual_float_1d k_q; DAT::tdual_float_1d k_radius; DAT::tdual_float_1d k_rmass; + DAT::tdual_float_1d_4 k_mu; DAT::tdual_v_array k_omega; DAT::tdual_v_array k_angmom; DAT::tdual_f_array k_torque; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 41059b3ec4..25e1616d6c 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -71,4 +71,3 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { #endif #endif - diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp new file mode 100644 index 0000000000..050fe2d0d1 --- /dev/null +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -0,0 +1,666 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "atom_vec_dipole_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "error.h" +#include "fix.h" +#include "memory_kokkos.h" +#include "modify.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +AtomVecDipoleKokkos::AtomVecDipoleKokkos(LAMMPS *lmp) : AtomVec(lmp), +AtomVecKokkos(lmp), AtomVecDipole(lmp) +{ + +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::grow(int n) +{ + auto DELTA = LMP_KOKKOS_AV_DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); + + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); + + memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); + memoryKK->grow_kokkos(atomKK->k_mu,atomKK->mu,nmax,"atom:mu"); + + grow_pointers(); + atomKK->sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::grow_pointers() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + q = atomKK->q; + d_q = atomKK->k_q.d_view; + h_q = atomKK->k_q.h_view; + mu = atomKK->mu; + d_mu = atomKK->k_mu.d_view; + h_mu = atomKK->k_mu.h_view; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDipoleKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_mu_array_randomread _mu; + typename ArrayTypes::t_xfloat_2d_um _buf; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecDipoleKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d_4 &mu, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()), + _mu(mu.view()), + _list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t elements = 7; // size_forward + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _mu(j,0); + _buf(i,4) = _mu(j,1); + _buf(i,5) = _mu(j,2); + _buf(i,6) = _mu(j,3); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,3) = _mu(j,0); + _buf(i,4) = _mu(j,1); + _buf(i,5) = _mu(j,2); + _buf(i,6) = _mu(j,3); + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,3) = _mu(j,0); + _buf(i,4) = _mu(j,1); + _buf(i,5) = _mu(j,2); + _buf(i,6) = _mu(j,3); + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDipoleKokkos_PackBorder { + typedef DeviceType device_type; + + typename ArrayTypes::t_xfloat_2d _buf; + const typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes::t_x_array_randomread _x; + const typename ArrayTypes::t_tagint_1d _tag; + const typename ArrayTypes::t_int_1d _type; + const typename ArrayTypes::t_int_1d _mask; + const typename ArrayTypes::t_float_1d _q; + const typename ArrayTypes::t_mu_array_randomread _mu; + X_FLOAT _dx,_dy,_dz; + + AtomVecDipoleKokkos_PackBorder( + const typename ArrayTypes::t_xfloat_2d &buf, + const typename ArrayTypes::t_int_2d_const &list, + const int & iswap, + const typename ArrayTypes::t_x_array &x, + const typename ArrayTypes::t_tagint_1d &tag, + const typename ArrayTypes::t_int_1d &type, + const typename ArrayTypes::t_int_1d &mask, + const typename ArrayTypes::t_float_1d &q, + const typename ArrayTypes::t_mu_array_randomread &mu, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _q(j); + _buf(i,7) = _mu(j,0); + _buf(i,8) = _mu(j,1); + _buf(i,9) = _mu(j,2); + _buf(i,10) = _mu(j,3); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _q(j); + _buf(i,7) = _mu(j,0); + _buf(i,8) = _mu(j,1); + _buf(i,9) = _mu(j,2); + _buf(i,10) = _mu(j,3); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (space==Host) { + AtomVecDipoleKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecDipoleKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz); + Kokkos::parallel_for(n,f); + } + + } else { + dx = dy = dz = 0; + if (space==Host) { + AtomVecDipoleKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecDipoleKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz); + Kokkos::parallel_for(n,f); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDipoleKokkos_UnpackBorder { + typedef DeviceType device_type; + + const typename ArrayTypes::t_xfloat_2d_const _buf; + typename ArrayTypes::t_x_array _x; + typename ArrayTypes::t_tagint_1d _tag; + typename ArrayTypes::t_int_1d _type; + typename ArrayTypes::t_int_1d _mask; + typename ArrayTypes::t_float_1d _q; + typename ArrayTypes::t_mu_array _mu; + int _first; + + + AtomVecDipoleKokkos_UnpackBorder( + const typename ArrayTypes::t_xfloat_2d_const &buf, + typename ArrayTypes::t_x_array &x, + typename ArrayTypes::t_tagint_1d &tag, + typename ArrayTypes::t_int_1d &type, + typename ArrayTypes::t_int_1d &mask, + typename ArrayTypes::t_float_1d &q, + typename ArrayTypes::t_mu_array &mu, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu),_first(first) { + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + _q(i+_first) = _buf(i,6); + _mu(i+_first,0) = _buf(i,7); + _mu(i+_first,1) = _buf(i,8); + _mu(i+_first,2) = _buf(i,9); + _mu(i+_first,3) = _buf(i,10); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK); + while (first+n >= nmax) grow(0); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK); + if (space==Host) { + struct AtomVecDipoleKokkos_UnpackBorder + f(buf.view(),h_x,h_tag,h_type,h_mask,h_q,h_mu,first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDipoleKokkos_UnpackBorder + f(buf.view(),d_x,d_tag,d_type,d_mask,d_q,d_mu,first); + Kokkos::parallel_for(n,f); + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDipoleKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_float_1d_randomread _q; + typename AT::t_mu_array_randomread _mu; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_float_1d _qw; + typename AT::t_sp_array _muw; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + + AtomVecDipoleKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _q(atom->k_q.view()), + _mu(atom->k_mu.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _qw(atom->k_q.view()), + _muw(atom->k_mu.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi) { + const size_t elements = 16; // 1st = # of values, followed by 15 values (see below) + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = 16; // elements + _buf(mysend,1) = _x(i,0); + _buf(mysend,2) = _x(i,1); + _buf(mysend,3) = _x(i,2); + _buf(mysend,4) = _v(i,0); + _buf(mysend,5) = _v(i,1); + _buf(mysend,6) = _v(i,2); + _buf(mysend,7) = d_ubuf(_tag[i]).d; + _buf(mysend,8) = d_ubuf(_type[i]).d; + _buf(mysend,9) = d_ubuf(_mask[i]).d; + _buf(mysend,10) = d_ubuf(_image[i]).d; + _buf(mysend,11) = _q[i]; + _buf(mysend,12) = _mu(i,0); + _buf(mysend,13) = _mu(i,1); + _buf(mysend,14) = _mu(i,2); + _buf(mysend,15) = _mu(i,3); + const int j = _copylist(mysend); + + if (j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _qw(i) = _q(j); + _muw(i,0) = _mu(j,0); + _muw(i,1) = _mu(j,1); + _muw(i,2) = _mu(j,2); + _muw(i,3) = _mu(j,3); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim, + X_FLOAT lo,X_FLOAT hi ) +{ + const size_t nelements = 16; // # of elements packed + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { + int newsize = nsend*nelements/k_buf.view().extent(1)+1; + k_buf.resize(newsize,k_buf.view().extent(1)); + } + if (space == Host) { + AtomVecDipoleKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + return nsend*nelements; + } else { + AtomVecDipoleKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + return nsend*nelements; + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDipoleKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_float_1d _q; + typename AT::t_mu_array _mu; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + + AtomVecDipoleKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _q(atom->k_q.view()), + _mu(atom->k_mu.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const size_t elements = 16; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + _x(i,0) = _buf(myrecv,1); + _x(i,1) = _buf(myrecv,2); + _x(i,2) = _buf(myrecv,3); + _v(i,0) = _buf(myrecv,4); + _v(i,1) = _buf(myrecv,5); + _v(i,2) = _buf(myrecv,6); + _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; + _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; + _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; + _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; + _q[i] = _buf(myrecv,11); + _mu(i,0) = _buf(myrecv,12); + _mu(i,1) = _buf(myrecv,13); + _mu(i,2) = _buf(myrecv,14); + _mu(i,3) = _buf(myrecv,15); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) { + const size_t nelements = 16; // # of elements packed + if (space == Host) { + k_count.h_view(0) = nlocal; + AtomVecDipoleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/nelements,f); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecDipoleKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/nelements,f); + k_count.modify(); + k_count.sync(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & Q_MASK) atomKK->k_q.sync(); + if (mask & MU_MASK) atomKK->k_mu.sync(); + } else { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & Q_MASK) atomKK->k_q.sync(); + if (mask & MU_MASK) atomKK->k_mu.sync(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + if (mask & Q_MASK) atomKK->k_q.modify(); + if (mask & MU_MASK) atomKK->k_mu.modify(); + } else { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + if (mask & Q_MASK) atomKK->k_q.modify(); + if (mask & MU_MASK) atomKK->k_mu.modify(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + if ((mask & Q_MASK) && atomKK->k_q.need_sync()) + perform_async_copy(atomKK->k_q,space); + if ((mask & MU_MASK) && atomKK->k_mu.need_sync()) + perform_async_copy(atomKK->k_mu,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + if ((mask & Q_MASK) && atomKK->k_q.need_sync()) + perform_async_copy(atomKK->k_q,space); + if ((mask & MU_MASK) && atomKK->k_mu.need_sync()) + perform_async_copy(atomKK->k_mu,space); + } +} diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h new file mode 100644 index 0000000000..fcd422bc4d --- /dev/null +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -0,0 +1,82 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS +// clang-format off +AtomStyle(dipole/kk,AtomVecDipoleKokkos); +AtomStyle(dipole/kk/device,AtomVecDipoleKokkos); +AtomStyle(dipole/kk/host,AtomVecDipoleKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_ATOM_VEC_DIPOLE_KOKKOS_H +#define LMP_ATOM_VEC_DIPOLE_KOKKOS_H + +#include "atom_vec_kokkos.h" +#include "atom_vec_dipole.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { + public: + AtomVecDipoleKokkos(class LAMMPS *); + + void grow(int) override; + void grow_pointers() override; + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) override; + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space) override; + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi) override; + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space) override; + + void sync(ExecutionSpace space, unsigned int mask) override; + void modified(ExecutionSpace space, unsigned int mask) override; + void sync_overlapping_device(ExecutionSpace space, unsigned int mask) override; + + protected: + double *q; + + DAT::t_tagint_1d d_tag; + HAT::t_tagint_1d h_tag; + + DAT::t_int_1d d_type, d_mask; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + + DAT::t_float_1d d_q; + HAT::t_float_1d h_q; + DAT::t_mu_array d_mu; + HAT::t_mu_array h_mu; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.cpp b/src/KOKKOS/fix_nve_sphere_kokkos.cpp index a1e8e76cd8..51e57b839e 100644 --- a/src/KOKKOS/fix_nve_sphere_kokkos.cpp +++ b/src/KOKKOS/fix_nve_sphere_kokkos.cpp @@ -50,10 +50,6 @@ template void FixNVESphereKokkos::init() { FixNVESphere::init(); - - if (extra == DIPOLE) { - error->all(FLERR,"Fix nve/sphere/kk doesn't yet support dipole"); - } } /* ---------------------------------------------------------------------- */ @@ -61,7 +57,10 @@ void FixNVESphereKokkos::init() template void FixNVESphereKokkos::initial_integrate(int /*vflag*/) { - atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK); + if (extra == DIPOLE) + atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK | MU_MASK); + else + atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK); x = atomKK->k_x.view(); v = atomKK->k_v.view(); @@ -71,6 +70,7 @@ void FixNVESphereKokkos::initial_integrate(int /*vflag*/) mask = atomKK->k_mask.view(); rmass = atomKK->k_rmass.view(); radius = atomKK->k_radius.view(); + mu = atomKK->k_mu.view(); int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; @@ -78,7 +78,10 @@ void FixNVESphereKokkos::initial_integrate(int /*vflag*/) FixNVESphereKokkosInitialIntegrateFunctor f(this); Kokkos::parallel_for(nlocal,f); - atomKK->modified(execution_space, X_MASK | V_MASK | OMEGA_MASK); + if (extra == DIPOLE) + atomKK->modified(execution_space, X_MASK | V_MASK | OMEGA_MASK | MU_MASK); + else + atomKK->modified(execution_space, X_MASK | V_MASK | OMEGA_MASK); } /* ---------------------------------------------------------------------- */ @@ -102,6 +105,17 @@ void FixNVESphereKokkos::initial_integrate_item(const int i) const omega(i,0) += dtirotate * torque(i,0); omega(i,1) += dtirotate * torque(i,1); omega(i,2) += dtirotate * torque(i,2); + + if (extra == DIPOLE) { + const double g0 = mu(i,0) + dtv * (omega(i,1) * mu(i,2) - omega(i,2) * mu(i,1)); + const double g1 = mu(i,1) + dtv * (omega(i,2) * mu(i,0) - omega(i,0) * mu(i,2)); + const double g2 = mu(i,2) + dtv * (omega(i,0) * mu(i,1) - omega(i,1) * mu(i,0)); + const double msq = g0*g0 + g1*g1 + g2*g2; + const double scale = mu(i,3)/sqrt(msq); + mu(i,0) = g0*scale; + mu(i,1) = g1*scale; + mu(i,2) = g2*scale; + } } } diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.h b/src/KOKKOS/fix_nve_sphere_kokkos.h index 551e64eb5c..268b4ea9ce 100644 --- a/src/KOKKOS/fix_nve_sphere_kokkos.h +++ b/src/KOKKOS/fix_nve_sphere_kokkos.h @@ -47,6 +47,7 @@ class FixNVESphereKokkos : public FixNVESphere { typename ArrayTypes::t_x_array x; typename ArrayTypes::t_v_array v; typename ArrayTypes::t_v_array omega; + typename ArrayTypes::t_mu_array mu; typename ArrayTypes::t_f_array f; typename ArrayTypes::t_f_array torque; typename ArrayTypes::t_float_1d rmass; diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 7fa07b85c5..a496f6ff94 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -717,6 +717,11 @@ typedef tdual_float_3d::t_dev_um t_float_3d_um; typedef tdual_float_3d::t_dev_const_um t_float_3d_const_um; typedef tdual_float_3d::t_dev_const_randomread t_float_3d_randomread; +#ifdef LMP_KOKKOS_NO_LEGACY +typedef Kokkos::DualView tdual_float_1d_4; +#else +typedef Kokkos::DualView tdual_float_1d_4; +#endif //Position Types //1d X_FLOAT array n @@ -819,6 +824,14 @@ typedef tdual_f_array::t_dev_um t_f_array_um; typedef tdual_f_array::t_dev_const_um t_f_array_const_um; typedef tdual_f_array::t_dev_const_randomread t_f_array_randomread; +//2d F_FLOAT array n*4 (for dipoles and quaterions) + +typedef tdual_float_1d_4::t_dev t_mu_array; +typedef tdual_float_1d_4::t_dev_const t_mu_array_const; +typedef tdual_float_1d_4::t_dev_um t_mu_array_um; +typedef tdual_float_1d_4::t_dev_const_um t_mu_array_const_um; +typedef tdual_float_1d_4::t_dev_const_randomread t_mu_array_randomread; + //2d F_FLOAT array n*6 (for virial) typedef Kokkos::DualView tdual_virial_array; @@ -831,11 +844,7 @@ typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread; // Spin Types //3d SP_FLOAT array n*4 -#ifdef LMP_KOKKOS_NO_LEGACY -typedef Kokkos::DualView tdual_float_1d_4; -#else -typedef Kokkos::DualView tdual_float_1d_4; -#endif + typedef tdual_float_1d_4::t_dev t_sp_array; typedef tdual_float_1d_4::t_dev_const t_sp_array_const; typedef tdual_float_1d_4::t_dev_um t_sp_array_um; @@ -1008,6 +1017,12 @@ typedef tdual_float_2d::t_host_um t_float_2d_um; typedef tdual_float_2d::t_host_const_um t_float_2d_const_um; typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread; +#ifdef LMP_KOKKOS_NO_LEGACY +typedef Kokkos::DualView tdual_float_1d_4; +#else +typedef Kokkos::DualView tdual_float_1d_4; +#endif + //Position Types //1d X_FLOAT array n typedef Kokkos::DualView tdual_xfloat_1d; @@ -1101,6 +1116,14 @@ typedef tdual_f_array::t_host_um t_f_array_um; typedef tdual_f_array::t_host_const_um t_f_array_const_um; typedef tdual_f_array::t_host_const_randomread t_f_array_randomread; +//2d F_FLOAT array n*4 (for dipoles and quaterions) + +typedef tdual_float_1d_4::t_host t_mu_array; +typedef tdual_float_1d_4::t_host_const t_mu_array_const; +typedef tdual_float_1d_4::t_host_um t_mu_array_um; +typedef tdual_float_1d_4::t_host_const_um t_mu_array_const_um; +typedef tdual_float_1d_4::t_host_const_randomread t_mu_array_randomread; + //2d F_FLOAT array n*6 (for virial) typedef Kokkos::DualView tdual_virial_array; typedef tdual_virial_array::t_host t_virial_array; @@ -1112,11 +1135,6 @@ typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread; // Spin types //2d X_FLOAT array n*4 -#ifdef LMP_KOKKOS_NO_LEGACY -typedef Kokkos::DualView tdual_float_1d_4; -#else -typedef Kokkos::DualView tdual_float_1d_4; -#endif typedef tdual_float_1d_4::t_host t_sp_array; typedef tdual_float_1d_4::t_host_const t_sp_array_const; typedef tdual_float_1d_4::t_host_um t_sp_array_um; diff --git a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp new file mode 100644 index 0000000000..c7532b27ab --- /dev/null +++ b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp @@ -0,0 +1,644 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + +#include "pair_lj_cut_dipole_cut_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "neigh_list_kokkos.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +template +PairLJCutDipoleCutKokkos::PairLJCutDipoleCutKokkos(LAMMPS *lmp) : PairLJCutDipoleCut(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TORQUE_MASK | TYPE_MASK | Q_MASK | MU_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | TORQUE_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairLJCutDipoleCutKokkos::~PairLJCutDipoleCutKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq,cut_ljsq); + memoryKK->destroy_kokkos(k_cut_coulsq,cut_coulsq); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutDipoleCutKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_cut_ljsq.template sync(); + k_cut_coulsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK | TORQUE_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + torque = atomKK->k_torque.view(); + q = atomKK->k_q.view(); + mu = atomKK->k_mu.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + newton_pair = force->newton_pair; + + // get the neighbor list and neighbors used in operator() + + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + int inum = list->inum; + + // loop over neighbors of my atoms + + copymode = 1; + + EV_FLOAT ev; + + // compute kernel NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS + if (atom->ntypes > MAX_TYPES_STACKPARAMS) { // STACKPARAMS==false + if (evflag) { // EVFLAG==1 + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else { // EVFLAG==0 + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + } else { // STACKPARAMS==true + if (evflag) { // EVFLAG==1 + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else { + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + } + + if (eflag_global) { + eng_vdwl += ev.evdwl; + eng_coul += ev.ecoul; + } + + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- + needs torque as with AtomVecSphereKokkos + needs energy calculation as well + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +void PairLJCutDipoleCutKokkos::operator()(TagPairLJCutDipoleCutKernel, const int ii, EV_FLOAT &ev) const { + + // The f and torque arrays are atomic for Half/Thread neighbor style + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const X_FLOAT mui = mu(i,3); + const int itype = type(i); + const F_FLOAT qtmp = q[i]; + + const int jnum = d_numneigh[i]; + + F_FLOAT fx_i = 0.0; + F_FLOAT fy_i = 0.0; + F_FLOAT fz_i = 0.0; + F_FLOAT torquex_i = 0.0; + F_FLOAT torquey_i = 0.0; + F_FLOAT torquez_i = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + const F_FLOAT factor_lj = special_lj[sbmask(j)]; + const F_FLOAT factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const X_FLOAT muj = mu(j,3); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + X_FLOAT cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype); + + if (rsq < cutsq_ij) { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT forcelj = 0; + F_FLOAT evdwl = 0; + F_FLOAT ecoul = 0; + F_FLOAT forcecoulx = 0; + F_FLOAT forcecouly = 0; + F_FLOAT forcecoulz = 0; + F_FLOAT tixcoul = 0; + F_FLOAT tiycoul = 0; + F_FLOAT tizcoul = 0; + F_FLOAT tjxcoul = 0; + F_FLOAT tjycoul = 0; + F_FLOAT tjzcoul = 0; + F_FLOAT fx = 0; + F_FLOAT fy = 0; + F_FLOAT fz = 0; + + // lj term + + X_FLOAT cut_ljsq_ij = STACKPARAMS?m_cut_ljsq[itype][jtype]:d_cut_ljsq(itype,jtype); + if (rsq < cut_ljsq_ij) { + forcelj = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + forcelj *= factor_lj*r2inv; + if (eflag_global) { + evdwl = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) - + (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset); + evdwl *= factor_lj; + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j 0.0 && muj > 0.0) { + + F_FLOAT r7inv = r5inv*r2inv; + + pdotp = mu(i,0)*mu(j,0) + mu(i,1)*mu(j,1) + mu(i,2)*mu(j,2); + pidotr = mu(i,0)*delx + mu(i,1)*dely + mu(i,2)*delz; + pjdotr = mu(j,0)*delx + mu(j,1)*dely + mu(j,2)*delz; + + X_FLOAT pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; + F_FLOAT pre2 = 3.0*r5inv*pjdotr; + F_FLOAT pre3 = 3.0*r5inv*pidotr; + F_FLOAT pre4 = -1.0*r3inv; + + forcecoulx += pre1*delx + pre2*mu(i,0) + pre3*mu(j,0); + forcecouly += pre1*dely + pre2*mu(i,1) + pre3*mu(j,1); + forcecoulz += pre1*delz + pre2*mu(i,2) + pre3*mu(j,2); + + F_FLOAT crossx = pre4 * (mu(i,1)*mu(j,2) - mu(i,2)*mu(j,1)); + F_FLOAT crossy = pre4 * (mu(i,2)*mu(j,0) - mu(i,0)*mu(j,2)); + F_FLOAT crossz = pre4 * (mu(i,0)*mu(j,1) - mu(i,1)*mu(j,0)); + + tixcoul += crossx + pre2 * (mu(i,1)*delz - mu(i,2)*dely); + tiycoul += crossy + pre2 * (mu(i,2)*delx - mu(i,0)*delz); + tizcoul += crossz + pre2 * (mu(i,0)*dely - mu(i,1)*delx); + tjxcoul += -crossx + pre3 * (mu(j,1)*delz - mu(j,2)*dely); + tjycoul += -crossy + pre3 * (mu(j,2)*delx - mu(j,0)*delz); + tjzcoul += -crossz + pre3 * (mu(j,0)*dely - mu(j,1)*delx); + } + + // dipole-charge + + if (mui > 0 && qj != 0) { + pidotr = mu(i,0)*delx + mu(i,1)*dely + mu(i,2)*delz; + F_FLOAT pre1 = 3.0*qj*r5inv * pidotr; + F_FLOAT pre2 = qj*r3inv; + + forcecoulx += pre2*mu(i,0) - pre1*delx; + forcecouly += pre2*mu(i,1) - pre1*dely; + forcecoulz += pre2*mu(i,2) - pre1*delz; + tixcoul += pre2 * (mu(i,1)*delz - mu(i,2)*dely); + tiycoul += pre2 * (mu(i,2)*delx - mu(i,0)*delz); + tizcoul += pre2 * (mu(i,0)*dely - mu(i,1)*delx); + } + + // charge-dipole + + if (qtmp != 0 && muj > 0) { + pjdotr = mu(j,0)*delx + mu(j,1)*dely + mu(j,2)*delz; + X_FLOAT pre1 = 3.0*qtmp*r5inv * pjdotr; + X_FLOAT pre2 = qtmp*r3inv; + + forcecoulx += pre1*delx - pre2*mu(j,0); + forcecouly += pre1*dely - pre2*mu(j,1); + forcecoulz += pre1*delz - pre2*mu(j,2); + tjxcoul += -pre2 * (mu(j,1)*delz - mu(j,2)*dely); + tjycoul += -pre2 * (mu(j,2)*delx - mu(j,0)*delz); + tjzcoul += -pre2 * (mu(j,0)*dely - mu(j,1)*delx); + } + + F_FLOAT fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + fx_i += fx; + fy_i += fy; + fz_i += fz; + torquex_i += fq*tixcoul; + torquey_i += fq*tiycoul; + torquez_i += fq*tizcoul; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_f(j,0) -= fx; + a_f(j,1) -= fy; + a_f(j,2) -= fz; + a_torque(j,0) += fq*tjxcoul; + a_torque(j,1) += fq*tjycoul; + a_torque(j,2) += fq*tjzcoul; + } + + if (EVFLAG && eflag_global) { + ecoul = qtmp*qj*rinv; + if (mu(i,3) > 0.0 && mu(j,3) > 0.0) + ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; + if (mu(i,3) > 0.0 && qj != 0.0) + ecoul += -qj*r3inv*pidotr; + if (mu(j,3) > 0.0 && qtmp != 0.0) + ecoul += qtmp*r3inv*pjdotr; + ecoul *= factor_coul*qqrd2e; + ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j(ev, i, j, ecoul+evdwl, fx, fy, fz, delx, dely, delz); + } // cutsq_ij + } + + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + a_torque(i,0) += torquex_i; + a_torque(i,1) += torquey_i; + a_torque(i,2) += torquez_i; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairLJCutDipoleCutKokkos::operator()(TagPairLJCutDipoleCutKernel, const int ii) const { + EV_FLOAT ev; + this->template operator()(TagPairLJCutDipoleCutKernel(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairLJCutDipoleCutKokkos::ev_tally_xyz(EV_FLOAT & ev, int i, int j, const F_FLOAT &epair, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const +{ + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); + + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + if (NEIGHFLAG == FULL || newton_pair || i < nlocal) + v_eatom[i] += epairhalf; + if (NEIGHFLAG != FULL && (newton_pair || j < nlocal)) + v_eatom[j] += epairhalf; + } + + if (vflag_either) { + const F_FLOAT v0 = delx*fx; + const F_FLOAT v1 = dely*fy; + const F_FLOAT v2 = delz*fz; + const F_FLOAT v3 = delx*fy; + const F_FLOAT v4 = delx*fz; + const F_FLOAT v5 = dely*fz; + + if (vflag_global) { + if (NEIGHFLAG != FULL) { + if (NEWTON_PAIR) { // neigh half, newton on + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { // neigh half, newton off + if (i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + } else { //neigh full + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + + if (NEIGHFLAG == FULL || NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEIGHFLAG != FULL && (NEWTON_PAIR || j < nlocal)) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairLJCutDipoleCutKokkos::allocate() +{ + PairLJCutDipoleCut::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + memory->destroy(cut_ljsq); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + d_cut_ljsq = k_cut_ljsq.template view(); + memory->destroy(cut_coulsq); + memoryKK->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + d_cut_coulsq = k_cut_coulsq.template view(); + k_params = Kokkos::DualView("PairLJCutDipoleCut::params",n+1,n+1); + params = k_params.template view(); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template +void PairLJCutDipoleCutKokkos::settings(int narg, char **arg) +{ + if (narg > 2) error->all(FLERR,"Illegal pair_style command"); + + PairLJCutDipoleCut::settings(1,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairLJCutDipoleCutKokkos::init_style() +{ + PairLJCutDipoleCut::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairLJCutDipoleCutKokkos::init_one(int i, int j) +{ + double cutone = PairLJCutDipoleCut::init_one(i,j); + double cut_ljsqm = cut_ljsq[i][j]; + double cut_coulsqm = cut_coulsq[i][j]; + + k_params.h_view(i,j).lj1 = lj1[i][j]; + k_params.h_view(i,j).lj2 = lj2[i][j]; + k_params.h_view(i,j).lj3 = lj3[i][j]; + k_params.h_view(i,j).lj4 = lj4[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cut_ljsq = cut_ljsqm; + k_params.h_view(i,j).cut_coulsq = cut_coulsqm; + + k_params.h_view(j,i) = k_params.h_view(i,j); + if (i(); + k_cut_ljsq.h_view(i,j) = k_cut_ljsq.h_view(j,i) = cut_ljsqm; + k_cut_ljsq.template modify(); + k_cut_coulsq.h_view(i,j) = k_cut_coulsq.h_view(j,i) = cut_coulsqm; + k_cut_coulsq.template modify(); + k_params.template modify(); + + return cutone; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int PairLJCutDipoleCutKokkos::sbmask(const int& j) const { + return j >> SBBITS & 3; +} + + +namespace LAMMPS_NS { +template class PairLJCutDipoleCutKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairLJCutDipoleCutKokkos; +#endif +} + diff --git a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h new file mode 100644 index 0000000000..d9d548d077 --- /dev/null +++ b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h @@ -0,0 +1,119 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(lj/cut/dipole/cut/kk,PairLJCutDipoleCutKokkos); +PairStyle(lj/cut/dipole/cut/kk/device,PairLJCutDipoleCutKokkos); +PairStyle(lj/cut/dipole/cut/kk/host,PairLJCutDipoleCutKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_KOKKOS_H +#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj_cut_dipole_cut.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +struct TagPairLJCutDipoleCutKernel{}; + +template +class PairLJCutDipoleCutKokkos : public PairLJCutDipoleCut { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + PairLJCutDipoleCutKokkos(class LAMMPS *); + ~PairLJCutDipoleCutKokkos() override; + + void compute(int, int) override; + + void settings(int, char **) override; + void init_style() override; + double init_one(int, int) override; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairLJCutDipoleCutKernel, const int, EV_FLOAT &ev) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairLJCutDipoleCutKernel, const int) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally_xyz(EV_FLOAT &ev, int i, int j, const F_FLOAT &epair, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const; + + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const; + + protected: + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + // hardwired to space for 12 atom types + params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_f_array torque; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread q; + typename AT::t_mu_array_randomread mu; + typename AT::t_mu_array c_mu; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + typename AT::tdual_ffloat_2d k_cut_ljsq; + typename AT::t_ffloat_2d d_cut_ljsq; + typename AT::tdual_ffloat_2d k_cut_coulsq; + typename AT::t_ffloat_2d d_cut_coulsq; + + + int neighflag,newton_pair; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + void allocate() override; + friend void pair_virial_fdotr_compute(PairLJCutDipoleCutKokkos*); +}; + +} + +#endif +#endif + diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp index 5cff54a440..6896967563 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp @@ -66,7 +66,6 @@ void PairLJCutKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL) no_virial_fdotr_compute = 1; ev_init(eflag,vflag,0); diff --git a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.cpp new file mode 100644 index 0000000000..fe87612cbf --- /dev/null +++ b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.cpp @@ -0,0 +1,498 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + +#include "pair_lj_expand_coul_long_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +template +PairLJExpandCoulLongKokkos::PairLJExpandCoulLongKokkos(LAMMPS *lmp) : PairLJExpandCoulLong(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairLJExpandCoulLongKokkos::~PairLJExpandCoulLongKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq,cut_ljsq); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJExpandCoulLongKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_cut_ljsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + newton_pair = force->newton_pair; + + // loop over neighbors of my atoms + + copymode = 1; + + EV_FLOAT ev; + if (ncoultablebits) + ev = pair_compute,CoulLongTable<1> > + (this,(NeighListKokkos*)list); + else + ev = pair_compute,CoulLongTable<0> > + (this,(NeighListKokkos*)list); + + + if (eflag_global) { + eng_vdwl += ev.evdwl; + eng_coul += ev.ecoul; + } + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- + compute LJ 12-6 pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJExpandCoulLongKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT rshift = r - (STACKPARAMS?m_params[itype][jtype].shift:params(itype,jtype).shift); + const F_FLOAT rshiftsq = rshift*rshift; + const F_FLOAT r2inv = 1.0/rshiftsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + + const F_FLOAT forcelj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + + return forcelj/rshift/r; +} + +/* ---------------------------------------------------------------------- + compute coulomb pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJExpandCoulLongKokkos:: +compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable]; + F_FLOAT forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + return forcecoul/rsq; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv; + F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + + return forcecoul*rinv*rinv; + } +} + +/* ---------------------------------------------------------------------- + compute LJ 12-6 pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJExpandCoulLongKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + + const F_FLOAT r = sqrt(rsq); + const F_FLOAT rshift = r - (STACKPARAMS?m_params[itype][jtype].shift:params(itype,jtype).shift); + const F_FLOAT rshiftsq = rshift*rshift; + const F_FLOAT r2inv = 1.0/rshiftsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + + return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv + - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) + - (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset); +} + +/* ---------------------------------------------------------------------- + compute coulomb pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJExpandCoulLongKokkos:: +compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, + const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable]; + F_FLOAT ecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + ecoul -= (1.0-factor_coul)*prefactor; + } + return ecoul; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r; + F_FLOAT ecoul = prefactor * erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + return ecoul; + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairLJExpandCoulLongKokkos::allocate() +{ + PairLJExpandCoulLong::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + + memory->destroy(cut_ljsq); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + d_cut_ljsq = k_cut_ljsq.template view(); + + d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1); + + k_params = Kokkos::DualView("PairLJExpandCoulLong::params",n+1,n+1); + params = k_params.template view(); +} + +template +void PairLJExpandCoulLongKokkos::init_tables(double cut_coul, double *cut_respa) +{ + Pair::init_tables(cut_coul,cut_respa); + + typedef typename ArrayTypes::t_ffloat_1d table_type; + typedef typename ArrayTypes::t_ffloat_1d host_table_type; + + int ntable = 1; + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + + + // Copy rtable and drtable + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = rtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_rtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = drtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_drtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ftable and dftable + for (int i = 0; i < ntable; i++) { + h_table(i) = ftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ctable and dctable + for (int i = 0; i < ntable; i++) { + h_table(i) = ctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy etable and detable + for (int i = 0; i < ntable; i++) { + h_table(i) = etable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_etable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = detable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_detable = d_table; + } +} + + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template +void PairLJExpandCoulLongKokkos::settings(int narg, char **arg) +{ + if (narg > 2) error->all(FLERR,"Illegal pair_style command"); + + PairLJExpandCoulLong::settings(narg,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairLJExpandCoulLongKokkos::init_style() +{ + PairLJExpandCoulLong::init_style(); + + Kokkos::deep_copy(d_cut_coulsq,cut_coulsq); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairLJExpandCoulLongKokkos::init_one(int i, int j) +{ + double cutone = PairLJExpandCoulLong::init_one(i,j); + double cut_ljsqm = cut_ljsq[i][j]; + + k_params.h_view(i,j).lj1 = lj1[i][j]; + k_params.h_view(i,j).lj2 = lj2[i][j]; + k_params.h_view(i,j).lj3 = lj3[i][j]; + k_params.h_view(i,j).lj4 = lj4[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).shift = shift[i][j]; + k_params.h_view(i,j).cut_ljsq = cut_ljsqm; + k_params.h_view(i,j).cut_coulsq = cut_coulsq; + + k_params.h_view(j,i) = k_params.h_view(i,j); + if (i(); + k_cut_ljsq.h_view(i,j) = k_cut_ljsq.h_view(j,i) = cut_ljsqm; + k_cut_ljsq.template modify(); + k_params.template modify(); + + return cutone; +} + +namespace LAMMPS_NS { +template class PairLJExpandCoulLongKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairLJExpandCoulLongKokkos; +#endif +} + diff --git a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h new file mode 100644 index 0000000000..09a694a122 --- /dev/null +++ b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h @@ -0,0 +1,148 @@ +// clang-format off +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(lj/expand/coul/long/kk,PairLJExpandCoulLongKokkos); +PairStyle(lj/expand/coul/long/kk/device,PairLJExpandCoulLongKokkos); +PairStyle(lj/expand/coul/long/kk/host,PairLJExpandCoulLongKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_LJ_EXPAND_COUL_LONG_KOKKOS_H +#define LMP_PAIR_LJ_EXPAND_COUL_LONG_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj_expand_coul_long.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairLJExpandCoulLongKokkos : public PairLJExpandCoulLong { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + PairLJExpandCoulLongKokkos(class LAMMPS *); + ~PairLJExpandCoulLongKokkos() override; + + void compute(int, int) override; + + void settings(int, char **) override; + void init_tables(double cut_coul, double *cut_respa) override; + void init_style() override; + double init_one(int, int) override; + + struct params_lj_coul{ + KOKKOS_INLINE_FUNCTION + params_lj_coul() {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;shift=0;}; + KOKKOS_INLINE_FUNCTION + params_lj_coul(int /*i*/) {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;shift=0;}; + F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset,shift; + }; + + protected: + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, + const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + // hardwired to space for 12 atom types + params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread q; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + int newton_pair; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + typename AT::tdual_ffloat_2d k_cut_ljsq; + typename AT::t_ffloat_2d d_cut_ljsq; + typename AT::t_ffloat_2d d_cut_coulsq; + + typename AT::t_ffloat_1d_randomread + d_rtable, d_drtable, d_ftable, d_dftable, + d_ctable, d_dctable, d_etable, d_detable; + + int neighflag; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + void allocate() override; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute >(PairLJExpandCoulLongKokkos*, + NeighListKokkos*); + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute >(PairLJExpandCoulLongKokkos*, + NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairLJExpandCoulLongKokkos*); +}; + +} + +#endif +#endif + diff --git a/src/atom_masks.h b/src/atom_masks.h index f94ecd6cbd..0058c3f57f 100644 --- a/src/atom_masks.h +++ b/src/atom_masks.h @@ -42,6 +42,7 @@ #define MAP_MASK 0x00008000 #define ENERGY_MASK 0x00010000 #define VIRIAL_MASK 0x00020000 +#define MU_MASK 0x00040000 // SPIN