diff --git a/doc/src/Commands_compute.rst b/doc/src/Commands_compute.rst index 0169826751..9dfb28fa8b 100644 --- a/doc/src/Commands_compute.rst +++ b/doc/src/Commands_compute.rst @@ -72,6 +72,7 @@ KOKKOS, o = OPENMP, t = OPT. * :doc:`gyration/shape/chunk ` * :doc:`heat/flux ` * :doc:`heat/flux/tally ` + * :doc:`heat/flux/virial/tally ` * :doc:`hexorder/atom ` * :doc:`hma ` * :doc:`improper ` diff --git a/doc/src/compute.rst b/doc/src/compute.rst index c2b2b92aa1..7a89c6cc87 100644 --- a/doc/src/compute.rst +++ b/doc/src/compute.rst @@ -208,7 +208,7 @@ The individual style names on the :doc:`Commands compute ` doc * :doc:`event/displace ` - detect event on atom displacement * :doc:`fabric ` - calculates fabric tensors from pair interactions * :doc:`fep ` - -* :doc:`force/tally ` - +* :doc:`force/tally ` - force between two groups of atoms via the tally callback mechanism * :doc:`fragment/atom ` - fragment ID for each atom * :doc:`global/atom ` - * :doc:`group/group ` - energy/force between two groups of atoms @@ -217,7 +217,8 @@ The individual style names on the :doc:`Commands compute ` doc * :doc:`gyration/shape ` - shape parameters from gyration tensor * :doc:`gyration/shape/chunk ` - shape parameters from gyration tensor for each chunk * :doc:`heat/flux ` - heat flux through a group of atoms -* :doc:`heat/flux/tally ` - +* :doc:`heat/flux/tally ` - heat flux through a group of atoms via the tally callback mechanism +* :doc:`heat/flux/virial/tally ` - virial heat flux between two groups via the tally callback mechanism * :doc:`hexorder/atom ` - bond orientational order parameter q6 * :doc:`hma ` - harmonically mapped averaging for atomic crystals * :doc:`improper ` - energy of each improper sub-style @@ -240,8 +241,8 @@ The individual style names on the :doc:`Commands compute ` doc * :doc:`pe ` - potential energy * :doc:`pe/atom ` - potential energy for each atom * :doc:`mesont ` - Nanotube bending,stretching, and intertube energies -* :doc:`pe/mol/tally ` - -* :doc:`pe/tally ` - +* :doc:`pe/mol/tally ` - potential energy between two groups of atoms separated into intermolecular and intramolecular components via the tally callback mechanism +* :doc:`pe/tally ` - potential energy between two groups of atoms via the tally callback mechanism * :doc:`plasticity/atom ` - Peridynamic plasticity for each atom * :doc:`pressure ` - total pressure and pressure tensor * :doc:`pressure/cylinder ` - pressure tensor in cylindrical coordinates @@ -289,7 +290,7 @@ The individual style names on the :doc:`Commands compute ` doc * :doc:`stress/atom ` - stress tensor for each atom * :doc:`stress/mop ` - normal components of the local stress tensor using the method of planes * :doc:`stress/mop/profile ` - profile of the normal components of the local stress tensor using the method of planes -* :doc:`stress/tally ` - +* :doc:`stress/tally ` - stress between two groups of atoms via the tally callback mechanism * :doc:`tdpd/cc/atom ` - per-atom chemical concentration of a specified species for each tDPD particle * :doc:`temp ` - temperature of group of atoms * :doc:`temp/asphere ` - temperature of aspherical particles diff --git a/doc/src/compute_tally.rst b/doc/src/compute_tally.rst index 32e3e31030..81756dda7c 100644 --- a/doc/src/compute_tally.rst +++ b/doc/src/compute_tally.rst @@ -1,5 +1,6 @@ .. index:: compute force/tally .. index:: compute heat/flux/tally +.. index:: compute heat/flux/virial/tally .. index:: compute pe/tally .. index:: compute pe/mol/tally .. index:: compute stress/tally @@ -10,6 +11,9 @@ compute force/tally command compute heat/flux/tally command =============================== +compute heat/flux/virial/tally command +====================================== + compute pe/tally command ======================== @@ -27,7 +31,7 @@ Syntax compute ID group-ID style group2-ID * ID, group-ID are documented in :doc:`compute ` command -* style = *force/tally* or *pe/tally* or *pe/mol/tally* or *stress/tally* +* style = *force/tally* or *heat/flux/tally* or *heat/flux/virial/tally* or * or *pe/tally* or *pe/mol/tally* or *stress/tally* * group2-ID = group ID of second (or same) group Examples @@ -38,13 +42,17 @@ Examples compute 1 lower force/tally upper compute 1 left pe/tally right compute 1 lower stress/tally lower + compute 1 subregion heat/flux/tally all + compute 1 liquid heat/flux/virial/tally solid Description """"""""""" Define a computation that calculates properties between two groups of -atoms by accumulating them from pairwise non-bonded computations. The -two groups can be the same. This is similar to :doc:`compute group/group ` only that the data is +atoms by accumulating them from pairwise non-bonded computations. +Except for *heat/flux/virial/tally*, the two groups can be the same. +This is similar to :doc:`compute group/group ` +only that the data is accumulated directly during the non-bonded force computation. The computes *force/tally*\ , *pe/tally*\ , *stress/tally*\ , and *heat/flux/tally* are primarily provided as example how to program @@ -57,6 +65,76 @@ the based classes of LAMMPS. ---------- +Compute *heat/flux/tally* obtains the heat flux +(strictly speaking, heat flow) inside the first group, +which is the sum of the convective contribution +due to atoms in the first group and the virial contribution +due to interaction between the first and second groups: + +.. math:: + + \mathbf{Q}= \sum_{i \in \text{group 1}} e_i \mathbf{v}_i + \frac{1}{2} \sum_{i \in \text{group 1}} \sum_{\substack{j \in \text{group 2} \\ j \neq i } } \left( \mathbf{F}_{ij} \cdot \mathbf{v}_j \right) \mathbf{r}_{ij} + +When the second group in *heat/flux/tally* is set to "all", +the resulting values will be identical +to that obtained by :doc:`compute heat/flux `, +provided only pairwise interactions exist. + +Compute *heat/flux/virial/tally* obtains the total virial heat flux +(strictly speaking, heat flow) into the first group due to interaction +with the second group, and is defined as: + +.. math:: + + Q = \frac{1}{2} \sum_{i \in \text{group 1}} \sum_{j \in \text{group 2}} \mathbf{F}_{ij} \cdot \left(\mathbf{v}_i + \mathbf{v}_j \right) + +Although, the *heat/flux/virial/tally* compute +does not include the convective term, +it can be used to obtain the total heat flux over control surfaces, +when there are no particles crossing over, +such as is often in solid-solid and solid-liquid interfaces. +This would be identical to the method of planes method. +Note that the *heat/flux/virial/tally* compute is distinctly different +from the *heat/flux* and *heat/flux/tally* computes, +that are essentially volume averaging methods. +The following example demonstrates the difference: + +.. code-block:: LAMMPS + + # System with only pairwise interactions. + # Non-periodic boundaries in the x direction. + # Has LeftLiquid and RightWall groups along x direction. + + # Heat flux over the solid-liquid interface + compute hflow_hfvt LeftLiquid heat/flux/virial/tally RightWall + variable hflux_hfvt equal c_hflow_hfvt/(ly*lz) + + # x component of approximate heat flux vector inside the liquid region, + # two approaches. + # + compute myKE all ke/atom + compute myPE all pe/atom + compute myStress all stress/atom NULL virial + compute hflow_hf LeftLiquid heat/flux myKE myPE myStress + variable hflux_hf equal c_hflow_hf[1]/${volLiq} + # + compute hflow_hft LeftLiquid heat/flux/tally all + variable hflux_hft equal c_hflow_hft[1]/${volLiq} + + # Pressure over the solid-liquid interface, three approaches. + # + compute force_gg RightWall group/group LeftLiquid + variable press_gg equal c_force_gg[1]/(ly*lz) + # + compute force_ft RightWall force/tally LeftLiquid + compute rforce_ft RightWall reduce sum c_force_ft[1] + variable press_ft equal c_rforce_ft/(ly*lz) + # + compute rforce_hfvt all reduce sum c_hflow_hfvt[1] + variable press_hfvt equal -c_rforce_hfvt/(ly*lz) + +---------- + The pairwise contributions are computing via a callback that the compute registers with the non-bonded pairwise force computation. This limits the use to systems that have no bonds, no Kspace, and no @@ -83,7 +161,17 @@ magnitude) and a per atom 3-element vector (force contribution from each atom). Compute *stress/tally* calculates a global scalar (average of the diagonal elements of the stress tensor) and a per atom vector (the 6 elements of stress tensor contributions from the -individual atom). +individual atom). As in :doc:`compute heat/flux `, +compute *heat/flux/tally* calculates a global vector of length 6, +where the first 3 components are the :math:`x`, :math:`y`, :math:`z` +components of the full heat flow vector, +and the next 3 components are the corresponding components +of just the convective portion of the flow, i.e. the +first term in the equation for :math:`\mathbf{Q}`. +Compute *heat/flux/virial/tally* calculates a global scalar (heat flow) +and a per atom 3-element vector +(contribution to the force acting over atoms in the first group +from individual atoms in both groups). Both the scalar and vector values calculated by this compute are "extensive". diff --git a/src/BOCS/fix_bocs.cpp b/src/BOCS/fix_bocs.cpp index d372d607e8..89ff3d8e4e 100644 --- a/src/BOCS/fix_bocs.cpp +++ b/src/BOCS/fix_bocs.cpp @@ -1100,7 +1100,12 @@ void FixBocs::initial_integrate_respa(int /*vflag*/, int ilevel, int /*iloop*/) nve_x(); if (pstat_flag) remap(); } +} +/* ---------------------------------------------------------------------- */ + +void FixBocs::pre_force_respa(int /*vflag*/, int ilevel, int /*iloop*/) +{ // if barostat, redo KSpace coeffs at outermost level, // since volume has changed diff --git a/src/BOCS/fix_bocs.h b/src/BOCS/fix_bocs.h index 4edf670fa5..69b32d4cd0 100644 --- a/src/BOCS/fix_bocs.h +++ b/src/BOCS/fix_bocs.h @@ -37,6 +37,7 @@ class FixBocs : public Fix { virtual void initial_integrate(int); virtual void final_integrate(); void initial_integrate_respa(int, int, int); + void pre_force_respa(int, int, int); void final_integrate_respa(int, int); virtual void pre_exchange(); double compute_scalar(); diff --git a/src/DRUDE/fix_tgnh_drude.cpp b/src/DRUDE/fix_tgnh_drude.cpp index 9f9bcada78..475a81e843 100644 --- a/src/DRUDE/fix_tgnh_drude.cpp +++ b/src/DRUDE/fix_tgnh_drude.cpp @@ -589,6 +589,7 @@ int FixTGNHDrude::setmask() mask |= INITIAL_INTEGRATE; mask |= FINAL_INTEGRATE; mask |= INITIAL_INTEGRATE_RESPA; + mask |= PRE_FORCE_RESPA; mask |= FINAL_INTEGRATE_RESPA; if (pre_exchange_flag) mask |= PRE_EXCHANGE; return mask; @@ -1023,7 +1024,12 @@ void FixTGNHDrude::initial_integrate_respa(int /*vflag*/, int ilevel, int /*iloo nve_x(); if (pstat_flag) remap(); } +} +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::pre_force_respa(int /*vflag*/, int ilevel, int /*iloop*/) +{ // if barostat, redo KSpace coeffs at outermost level, // since volume has changed diff --git a/src/DRUDE/fix_tgnh_drude.h b/src/DRUDE/fix_tgnh_drude.h index 3549317655..617a99f856 100644 --- a/src/DRUDE/fix_tgnh_drude.h +++ b/src/DRUDE/fix_tgnh_drude.h @@ -28,6 +28,7 @@ class FixTGNHDrude : public Fix { virtual void setup(int); virtual void initial_integrate(int); virtual void final_integrate(); + void pre_force_respa(int, int, int); void initial_integrate_respa(int, int, int); void final_integrate_respa(int, int); virtual void pre_exchange(); diff --git a/src/INTEL/intel_intrinsics.h b/src/INTEL/intel_intrinsics.h index a17cc2859a..295310283d 100644 --- a/src/INTEL/intel_intrinsics.h +++ b/src/INTEL/intel_intrinsics.h @@ -85,7 +85,11 @@ struct vector_ops {}; // Intrinsic routines for IMCI and AVX-512 #if defined(__MIC__) || defined(__AVX512F__) // Integer vector class +#ifdef __INTEL_LLVM_COMPILER +#pragma pack(push,16) +#else #pragma pack(push,64) +#endif struct ivec32x16 { __m512i vec; ivec32x16() {} @@ -113,7 +117,7 @@ struct vector_ops { typedef double fscal; typedef F64vec8 fvec; typedef ivec32x16 ivec; - typedef __mmask bvec; + typedef __mmask16 bvec; typedef double farr[8] __attribute__((aligned(64))); typedef int iarr[16] __attribute__((aligned(64))); static fvec recip(const fvec &a) { return _mm512_recip_pd(a); } @@ -250,7 +254,7 @@ struct vector_ops { typedef float fscal; typedef F32vec16 fvec; typedef ivec32x16 ivec; - typedef __mmask bvec; + typedef __mmask16 bvec; typedef float farr[16] __attribute__((aligned(64))); typedef int iarr[16] __attribute__((aligned(64))); static const bvec full_mask = 0xFFFF; @@ -380,16 +384,18 @@ struct vector_ops { *r3 = gather<4>(*r3, mask, idxs, reinterpret_cast(base) + 12); } // Additional routines needed for the implementation of mixed precision - static fvec cvtdown(const vector_ops::fvec &lo, const vector_ops::fvec &hi) { + static fvec cvtdown(const vector_ops::fvec &lo, + const vector_ops::fvec &hi) { __m512 t1 = _mm512_cvtpd_pslo(lo); __m512 t2 = _mm512_cvtpd_pslo(hi); - return _mm512_mask_permute4f128_ps(t1, 0xFF00, t2, _MM_PERM_BADC); + return _mm512_mask_shuffle_f32x4(_mm512_undefined_ps(), 0xFF00, t2, t2, + 0x4E); } static vector_ops::fvec cvtup_lo(const fvec &a) { return _mm512_cvtpslo_pd(a); } static vector_ops::fvec cvtup_hi(const fvec &a) { - return _mm512_cvtpslo_pd(_mm512_permute4f128_ps(a, _MM_PERM_BADC)); // permute DCBA -> BADC + return _mm512_cvtpslo_pd(_mm512_shuffle_f32x4(a, a, 0x4E)); } static void mask_cvtup(const bvec &a, vector_ops::bvec *blo, vector_ops::bvec *bhi) { *blo = a & 0xFF; @@ -1692,7 +1698,7 @@ struct vector_ops { typedef flt_t fscal; typedef flt_t fvec; typedef int ivec; - typedef bool bvec; + typedef int bvec; typedef flt_t farr[1]; typedef int iarr[1]; static fvec recip(const fvec &a) { diff --git a/src/INTEL/intel_intrinsics_airebo.h b/src/INTEL/intel_intrinsics_airebo.h index f49abbaf3d..ac58ca2438 100644 --- a/src/INTEL/intel_intrinsics_airebo.h +++ b/src/INTEL/intel_intrinsics_airebo.h @@ -651,12 +651,12 @@ class avec16pd { return a >> 8; } VEC_INLINE static __m512i get_ivec_hi(__m512i a) { - return _mm512_permute4f128_epi32(a, _MM_PERM_BADC); + return _mm512_shuffle_i32x4(a, a, 0x4E); } public: VEC_INLINE avec16pd(const FVEC_NAME &a) { lo_ = _mm512_cvtpslo_pd(a.val_); - hi_ = _mm512_cvtpslo_pd(_mm512_permute4f128_ps(a.val_, _MM_PERM_BADC)); + hi_ = _mm512_cvtpslo_pd(_mm512_shuffle_f32x4(a.val_, a.val_, 0x4E)); } VEC_INLINE static avec16pd undefined() { return avec16pd(_mm512_undefined_pd(), _mm512_undefined_pd()); diff --git a/src/INTEL/intel_preprocess.h b/src/INTEL/intel_preprocess.h index 6f9b37a700..0bec9935db 100644 --- a/src/INTEL/intel_preprocess.h +++ b/src/INTEL/intel_preprocess.h @@ -16,6 +16,11 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#ifdef __INTEL_LLVM_COMPILER +#define __INTEL_COMPILER __INTEL_LLVM_COMPILER +#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER +#endif + #ifdef __INTEL_COMPILER #define LMP_SIMD_COMPILER #if (__INTEL_COMPILER_BUILD_DATE > 20160720) diff --git a/src/INTEL/pair_airebo_intel.cpp b/src/INTEL/pair_airebo_intel.cpp index 94874153aa..5ea0d3168b 100644 --- a/src/INTEL/pair_airebo_intel.cpp +++ b/src/INTEL/pair_airebo_intel.cpp @@ -2332,7 +2332,7 @@ static void aut_rebo_neigh(KernelArgsAIREBOT * ka) { int n_skin = 0; int lowest_idx; - #pragma unroll(4) + //#pragma unroll(4) for (lowest_idx = 0; lowest_idx < jnum; lowest_idx += fvec::VL) { bvec j_mask = bvec::full(); if (lowest_idx + fvec::VL > jnum) j_mask = bvec::only(jnum - lowest_idx); diff --git a/src/INTEL/pair_dpd_intel.cpp b/src/INTEL/pair_dpd_intel.cpp index 089396afa3..e7514a1f95 100644 --- a/src/INTEL/pair_dpd_intel.cpp +++ b/src/INTEL/pair_dpd_intel.cpp @@ -289,7 +289,7 @@ void PairDPDIntel::eval(const int offload, const int vflag, } #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nog2s + #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif diff --git a/src/INTEL/pair_eam_intel.cpp b/src/INTEL/pair_eam_intel.cpp index a86040b6b3..dcff8957fd 100644 --- a/src/INTEL/pair_eam_intel.cpp +++ b/src/INTEL/pair_eam_intel.cpp @@ -308,7 +308,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, acc_t rhoi = (acc_t)0.0; int ej = 0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nog2s + #pragma vector aligned #pragma ivdep #endif for (int jj = 0; jj < jnum; jj++) { @@ -327,7 +327,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, } #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nog2s + #pragma vector aligned #pragma simd reduction(+:rhoi) #endif for (int jj = 0; jj < ej; jj++) { @@ -414,7 +414,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, if (EFLAG) tevdwl = (acc_t)0.0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nog2s + #pragma vector aligned #pragma simd reduction(+:tevdwl) #endif for (int ii = iifrom; ii < iito; ++ii) { @@ -488,7 +488,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, int ej = 0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nog2s + #pragma vector aligned #pragma ivdep #endif for (int jj = 0; jj < jnum; jj++) { @@ -510,7 +510,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, } #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nog2s + #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif diff --git a/src/INTEL/pair_lj_cut_intel.cpp b/src/INTEL/pair_lj_cut_intel.cpp index eb60e0442f..84bc664e18 100644 --- a/src/INTEL/pair_lj_cut_intel.cpp +++ b/src/INTEL/pair_lj_cut_intel.cpp @@ -241,7 +241,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag, if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nog2s + #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif diff --git a/src/INTEL/pair_tersoff_intel.cpp b/src/INTEL/pair_tersoff_intel.cpp index 248c3420e6..d4b8f7d499 100644 --- a/src/INTEL/pair_tersoff_intel.cpp +++ b/src/INTEL/pair_tersoff_intel.cpp @@ -349,11 +349,11 @@ void PairTersoffIntel::eval(const int offload, const int vflag, lmp_intel::vector_traits::support_integer_and_gather_ops; bool use_scalar = VL < 4; if (use_scalar) { - IntelKernelTersoff::kernel(ARGS); + IntelKernelTersoff::template kernel(ARGS); } else if (pack_i) { - IntelKernelTersoff::kernel(ARGS); + IntelKernelTersoff::template kernel(ARGS); } else { - IntelKernelTersoff::kernel(ARGS); + IntelKernelTersoff::template kernel(ARGS); } if (EFLAG) oevdwl += sevdwl; } @@ -673,7 +673,8 @@ void IntelKernelTersoff::kernel_step( fvec vrijsq = vdx_ij * vdx_ij + vdy_ij * vdy_ij + vdz_ij * vdz_ij; fvec vrij = sqrt(vrijsq); ivec vis_orig = v::int_load_vl(is); - ivec vnumneigh_i = v::int_gather<4>(v_i0, vmask, vis_orig, numneigh); + ivec vnumneigh_i = v::template int_gather<4>(v_i0, vmask, vis_orig, + numneigh); ivec vc_idx_ij = v::int_mullo(v_i4floats, vw_j + v::int_mullo(v_i_ntypes, vw_i)); fvec vzeta = v::zero(); @@ -700,14 +701,16 @@ void IntelKernelTersoff::kernel_step( while (! v::mask_testz(vactive_mask) && cache_idx < N_CACHE) { bvec vnew_mask = vactive_mask & ~ veff_old_mask; vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK & - v::int_gather<4>(vks, vactive_mask, vkks + vcnumneigh_i, firstneigh)); + (v::template int_gather<4>(vks, vactive_mask, + vkks + vcnumneigh_i, + firstneigh))); v::gather_x(vks, vnew_mask, x, &vx_k, &vy_k, &vz_k, &vw_k); fvec vdx_ik = (vx_k - vx_i); fvec vdy_ik = (vy_k - vy_i); fvec vdz_ik = (vz_k - vz_i); fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik; ivec vc_idx = v::int_mullo(v_i4floats, vw_k) + v::int_mullo(v_i_ntypes, vc_idx_ij); - vcutsq = v::gather<4>(vcutsq, vnew_mask, vc_idx, c_inner); + vcutsq = v::template gather<4>(vcutsq, vnew_mask, vc_idx, c_inner); bvec vcutoff_mask = v::cmplt(vrsq, vcutsq); bvec vsame_mask = v::int_cmpneq(vjs, vks); bvec veff_mask = vcutoff_mask & vsame_mask & vactive_mask; @@ -751,14 +754,16 @@ void IntelKernelTersoff::kernel_step( while (! v::mask_testz(vactive_mask)) { bvec vnew_mask = vactive_mask & ~ veff_old_mask; vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK & - v::int_gather<4>(vks, vactive_mask, vkks + vcnumneigh_i, firstneigh)); + (v::template int_gather<4>(vks, vactive_mask, + vkks + vcnumneigh_i, + firstneigh))); v::gather_x(vks, vnew_mask, x, &vx_k, &vy_k, &vz_k, &vw_k); fvec vdx_ik = (vx_k - vx_i); fvec vdy_ik = (vy_k - vy_i); fvec vdz_ik = (vz_k - vz_i); fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik; ivec vc_idx = v::int_mullo(v_i4floats, vw_k) + v::int_mullo(v_i_ntypes, vc_idx_ij); - vcutsq = v::gather<4>(vcutsq, vnew_mask, vc_idx, c_inner); + vcutsq = v::template gather<4>(vcutsq, vnew_mask, vc_idx, c_inner); bvec vcutoff_mask = v::cmplt(vrsq, vcutsq); bvec vsame_mask = v::int_cmpneq(vjs, vks); bvec veff_mask = vcutoff_mask & vsame_mask & vactive_mask; @@ -818,14 +823,16 @@ void IntelKernelTersoff::kernel_step( while (! v::mask_testz(vactive_mask)) { bvec vnew_mask = vactive_mask & ~ veff_old_mask; vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK & - v::int_gather<4>(vks, vactive_mask, vkks + vcnumneigh_i, firstneigh)); + (v::template int_gather<4>(vks, vactive_mask, + vkks + vcnumneigh_i, + firstneigh))); v::gather_x(vks, vnew_mask, x, &vx_k, &vy_k, &vz_k, &vw_k); fvec vdx_ik = vx_k - vx_i; fvec vdy_ik = vy_k - vy_i; fvec vdz_ik = vz_k - vz_i; fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik; ivec vc_idx = v::int_mullo(v_i4floats, vw_k) + v::int_mullo(v_i_ntypes, vc_idx_ij); - vcutsq = v::gather<4>(vcutsq, vnew_mask, vc_idx, c_inner); + vcutsq = v::template gather<4>(vcutsq, vnew_mask, vc_idx, c_inner); bvec vcutoff_mask = v::cmplt(vrsq, vcutsq); bvec vsame_mask = v::int_cmpneq(vjs, vks); bvec veff_mask = vcutoff_mask & vsame_mask & vactive_mask; @@ -973,7 +980,7 @@ void IntelKernelTersoff::kernel_step_const_i( fvec vdy_ik = vy_k - vy_i; fvec vdz_ik = vz_k - vz_i; fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik; - fvec vcutsq = v::gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]); + fvec vcutsq = v::template gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]); bvec vcutoff_mask = v::cmplt(vrsq, vcutsq); bvec vsame_mask = v::int_cmpneq(vjs, ivec(static_cast(4 * sizeof(typename v::fscal) * k))); bvec veff_mask = vcutoff_mask & vsame_mask & vmask; @@ -1017,7 +1024,7 @@ void IntelKernelTersoff::kernel_step_const_i( fvec vdy_ik = vy_k - vy_i; fvec vdz_ik = vz_k - vz_i; fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik; - fvec vcutsq = v::gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]); + fvec vcutsq = v::template gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k]); bvec vcutoff_mask = v::cmplt(vrsq, vcutsq); bvec vsame_mask = v::int_cmpneq(vjs, ivec(static_cast(4 * sizeof(typename v::fscal) * k))); bvec veff_mask = vcutoff_mask & vsame_mask & vmask; @@ -1064,7 +1071,7 @@ void IntelKernelTersoff::kernel_step_const_i( fvec vdy_ik = vy_k - vy_i; fvec vdz_ik = vz_k - vz_i; fvec vrsq = vdx_ik * vdx_ik + vdy_ik * vdy_ik + vdz_ik * vdz_ik; - fvec vcutsq = v::gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k].cutsq); + fvec vcutsq = v::template gather<4>(v::zero(), vmask, vc_idx_j_ntypes, &c_inner[ntypes * ntypes * w_i + w_k].cutsq); bvec vcutoff_mask = v::cmplt(vrsq, vcutsq); bvec vsame_mask = v::int_cmpneq(vjs, ivec(static_cast(4 * sizeof(typename v::fscal) * k))); bvec veff_mask = vcutoff_mask & vsame_mask & vmask; @@ -1210,7 +1217,7 @@ void IntelKernelTersoff::kernel( template -IntelKernelTersoff::fvec IntelKernelTersoff::zeta_vector( +typename IntelKernelTersoff::fvec IntelKernelTersoff::zeta_vector( const c_inner_t * param, ivec xjw, bvec mask, fvec vrij, fvec rsq2, @@ -1336,6 +1343,8 @@ void IntelKernelTersoff::force_zeta_vector( } } +#define BCF lmp_intel::vector_routines + template template void IntelKernelTersoff::attractive_vector( @@ -1375,7 +1384,7 @@ void IntelKernelTersoff::attractive_vector( fvec varg3 = varg1 * varg1 * varg1; bvec mask_ex = v::cmpeq(vppowermint, fvec(3.)); fvec varg = v::blend(mask_ex, varg1, varg3); - fvec vex_delr = min(fvec(1.e30), exp(varg)); + fvec vex_delr = BCF::min(fvec(1.e30), exp(varg)); fvec vex_delr_d_factor = v::blend(mask_ex, v_1_0, fvec(3.0) * varg1 * varg1); fvec vex_delr_d = vplam3 * vex_delr_d_factor * vex_delr; bvec vmask_need_sine = v::cmpnle(vrik, vpbigr - vpbigd) & mask; @@ -1395,12 +1404,12 @@ void IntelKernelTersoff::attractive_vector( if (ZETA) *zeta = vfc * vgijk * vex_delr; fvec vminus_costheta = - vcostheta; - fvec vdcosdrjx = vrijinv * fmadd(vminus_costheta, vrij_hatx, rik_hatx); - fvec vdcosdrjy = vrijinv * fmadd(vminus_costheta, vrij_haty, rik_haty); - fvec vdcosdrjz = vrijinv * fmadd(vminus_costheta, vrij_hatz, rik_hatz); - fvec vdcosdrkx = rikinv * fmadd(vminus_costheta, rik_hatx, vrij_hatx); - fvec vdcosdrky = rikinv * fmadd(vminus_costheta, rik_haty, vrij_haty); - fvec vdcosdrkz = rikinv * fmadd(vminus_costheta, rik_hatz, vrij_hatz); + fvec vdcosdrjx = vrijinv * BCF::fmadd(vminus_costheta, vrij_hatx, rik_hatx); + fvec vdcosdrjy = vrijinv * BCF::fmadd(vminus_costheta, vrij_haty, rik_haty); + fvec vdcosdrjz = vrijinv * BCF::fmadd(vminus_costheta, vrij_hatz, rik_hatz); + fvec vdcosdrkx = rikinv * BCF::fmadd(vminus_costheta, rik_hatx, vrij_hatx); + fvec vdcosdrky = rikinv * BCF::fmadd(vminus_costheta, rik_haty, vrij_haty); + fvec vdcosdrkz = rikinv * BCF::fmadd(vminus_costheta, rik_hatz, vrij_hatz); fvec vdcosdrix = -(vdcosdrjx + vdcosdrkx); fvec vdcosdriy = -(vdcosdrjy + vdcosdrky); fvec vdcosdriz = -(vdcosdrjz + vdcosdrkz); diff --git a/src/INTEL/pair_tersoff_intel.h b/src/INTEL/pair_tersoff_intel.h index 0a40844622..b40ce19787 100644 --- a/src/INTEL/pair_tersoff_intel.h +++ b/src/INTEL/pair_tersoff_intel.h @@ -24,12 +24,12 @@ PairStyle(tersoff/intel,PairTersoffIntel); #ifndef LMP_PAIR_TERSOFF_INTEL_H #define LMP_PAIR_TERSOFF_INTEL_H -#ifdef __INTEL_COMPILER - #include "pair.h" #include "fix_intel.h" #include "pair_tersoff.h" +#ifdef __INTEL_COMPILER + namespace LAMMPS_NS { class PairTersoffIntel : public PairTersoff { diff --git a/src/TALLY/compute_heat_flux_virial_tally.cpp b/src/TALLY/compute_heat_flux_virial_tally.cpp new file mode 100644 index 0000000000..1a594c1b36 --- /dev/null +++ b/src/TALLY/compute_heat_flux_virial_tally.cpp @@ -0,0 +1,238 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "compute_heat_flux_virial_tally.h" + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "group.h" +#include "memory.h" +#include "pair.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +ComputeHeatFluxVirialTally::ComputeHeatFluxVirialTally(LAMMPS *lmp, int narg, char **arg) : + Compute(lmp, narg, arg) +{ + if (narg < 4) error->all(FLERR, "Illegal compute heat/flux/virial/tally command"); + + igroup2 = group->find(arg[3]); + if (igroup2 == -1) + error->all(FLERR, "Could not find compute heat/flux/virial/tally second group ID"); + groupbit2 = group->bitmask[igroup2]; + + scalar_flag = 1; + vector_flag = 0; + peratom_flag = 1; + timeflag = 1; + dynamic_group_allow = 0; + + comm_reverse = size_peratom_cols = 3; + extscalar = 1; + peflag = 1; // we need Pair::ev_tally() to be run + + did_setup = invoked_peratom = invoked_scalar = -1; + nmax = -1; + fatom = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +ComputeHeatFluxVirialTally::~ComputeHeatFluxVirialTally() +{ + if (force && force->pair) force->pair->del_tally_callback(this); + memory->destroy(fatom); +} + +/* ---------------------------------------------------------------------- */ + +void ComputeHeatFluxVirialTally::init() +{ + if (force->pair == nullptr) + error->all(FLERR, "Trying to use compute heat/flux/virial/tally without pair style"); + else + force->pair->add_tally_callback(this); + + if (comm->me == 0) { + if (force->pair->single_enable == 0 || force->pair->manybody_flag) + error->warning(FLERR, "Compute heat/flux/virial/tally used with incompatible pair style"); + + if (force->bond || force->angle || force->dihedral || force->improper || force->kspace) + error->warning(FLERR, "Compute heat/flux/virial/tally only called from pair style"); + } + + // error out if any atoms are in both groups + for (int i = 0; i < atom->nlocal; i++) { + if ((atom->mask[i] & groupbit) && (atom->mask[i] & groupbit2)) { + if (atom->tag_enable) { + error->all(FLERR, + "Atom {} belonging to both groups is not allowed" + " with compute heat/flux/virial/tally", + atom->tag[i]); + } else { + error->all(FLERR, + "Atom belonging to both groups is not allowed" + " with compute heat/flux/virial/tally"); + } + } + } + + did_setup = -1; +} + +/* ---------------------------------------------------------------------- */ + +void ComputeHeatFluxVirialTally::pair_setup_callback(int, int) +{ + // run setup only once per time step. + // we may be called from multiple pair styles + + if (did_setup == update->ntimestep) return; + + const int ntotal = atom->nlocal + atom->nghost; + + // grow per-atom storage, if needed + + if (atom->nmax > nmax) { + memory->destroy(fatom); + nmax = atom->nmax; + memory->create(fatom, nmax, size_peratom_cols, "heat/flux/virial/tally:fatom"); + array_atom = fatom; + } + + // clear storage + + for (int i = 0; i < ntotal; ++i) + for (int j = 0; j < size_peratom_cols; ++j) fatom[i][j] = 0.0; + + did_setup = update->ntimestep; +} + +/* ---------------------------------------------------------------------- */ +void ComputeHeatFluxVirialTally::pair_tally_callback(int i, int j, int nlocal, int newton, double, + double, double fpair, double dx, double dy, + double dz) +{ + const int *const mask = atom->mask; + + const bool ingroup1 = (mask[i] & groupbit); + if ((ingroup1 && (mask[j] & groupbit2)) || ((mask[i] & groupbit2) && (mask[j] & groupbit))) { + + // signs set to calculate heat flux from group2 into group1 + const double fx = (ingroup1 ? 0.5 : -0.5) * dx * fpair; + const double fy = (ingroup1 ? 0.5 : -0.5) * dy * fpair; + const double fz = (ingroup1 ? 0.5 : -0.5) * dz * fpair; + + if (newton || i < nlocal) { + fatom[i][0] += fx; + fatom[i][1] += fy; + fatom[i][2] += fz; + } + if (newton || j < nlocal) { + fatom[j][0] += fx; + fatom[j][1] += fy; + fatom[j][2] += fz; + } + } +} + +/* ---------------------------------------------------------------------- */ + +int ComputeHeatFluxVirialTally::pack_reverse_comm(int n, int first, double *buf) +{ + int i, m, last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + buf[m++] = fatom[i][0]; + buf[m++] = fatom[i][1]; + buf[m++] = fatom[i][2]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void ComputeHeatFluxVirialTally::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i, j, m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + fatom[j][0] += buf[m++]; + fatom[j][1] += buf[m++]; + fatom[j][2] += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +double ComputeHeatFluxVirialTally::compute_scalar() +{ + // need to collect contributions from ghost atoms + // because we need to use local velocities to compute heat flux + if (invoked_peratom != update->ntimestep) compute_peratom(); + + invoked_scalar = update->ntimestep; + if ((did_setup != invoked_scalar) || (update->eflag_global != invoked_scalar)) + error->all(FLERR, "Energy was not tallied on needed timestep"); + + // sum heat flux across procs + double hflux = 0.0; + for (int i = 0; i < atom->nlocal; i++) + hflux += + fatom[i][0] * atom->v[i][0] + fatom[i][1] * atom->v[i][1] + fatom[i][2] * atom->v[i][2]; + + MPI_Allreduce(&hflux, &scalar, 1, MPI_DOUBLE, MPI_SUM, world); + + return scalar; +} + +/* ---------------------------------------------------------------------- */ + +void ComputeHeatFluxVirialTally::compute_peratom() +{ + invoked_peratom = update->ntimestep; + if ((did_setup != invoked_peratom) || (update->eflag_global != invoked_peratom)) + error->all(FLERR, "Energy was not tallied on needed timestep"); + + // collect contributions from ghost atoms + + if (force->newton_pair) { + comm->reverse_comm_compute(this); + + // clear out ghost atom data after it has been collected to local atoms + const int nall = atom->nlocal + atom->nghost; + for (int i = atom->nlocal; i < nall; ++i) + for (int j = 0; j < size_peratom_cols; ++j) fatom[i][j] = 0.0; + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based array +------------------------------------------------------------------------- */ + +double ComputeHeatFluxVirialTally::memory_usage() +{ + double bytes = (nmax < 0) ? 0 : nmax * size_peratom_cols * sizeof(double); + return bytes; +} diff --git a/src/TALLY/compute_heat_flux_virial_tally.h b/src/TALLY/compute_heat_flux_virial_tally.h new file mode 100644 index 0000000000..d2a3d39a7d --- /dev/null +++ b/src/TALLY/compute_heat_flux_virial_tally.h @@ -0,0 +1,64 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(heat/flux/virial/tally,ComputeHeatFluxVirialTally); +// clang-format on +#else + +#ifndef LMP_COMPUTE_HEAT_FLUX_VIRIAL_TALLY_H +#define LMP_COMPUTE_HEAT_FLUX_VIRIAL_TALLY_H + +#include "compute.h" + +namespace LAMMPS_NS { + +class ComputeHeatFluxVirialTally : public Compute { + + public: + ComputeHeatFluxVirialTally(class LAMMPS *, int, char **); + virtual ~ComputeHeatFluxVirialTally(); + + void init(); + + double compute_scalar(); + void compute_peratom(); + + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + double memory_usage(); + + void pair_setup_callback(int, int); + void pair_tally_callback(int, int, int, int, double, double, double, double, double, double); + + private: + bigint did_setup; + int nmax, igroup2, groupbit2; + double **fatom; +}; + +} // namespace LAMMPS_NS + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +*/ diff --git a/src/USER-MISC/fix_npt_cauchy.cpp b/src/USER-MISC/fix_npt_cauchy.cpp index 2390872631..e58632635f 100644 --- a/src/USER-MISC/fix_npt_cauchy.cpp +++ b/src/USER-MISC/fix_npt_cauchy.cpp @@ -671,6 +671,7 @@ int FixNPTCauchy::setmask() mask |= INITIAL_INTEGRATE; mask |= FINAL_INTEGRATE; mask |= INITIAL_INTEGRATE_RESPA; + mask |= PRE_FORCE_RESPA; mask |= FINAL_INTEGRATE_RESPA; if (pre_exchange_flag) mask |= PRE_EXCHANGE; return mask; @@ -1032,7 +1033,12 @@ void FixNPTCauchy::initial_integrate_respa(int /*vflag*/, int ilevel, int /*iloo nve_x(); if (pstat_flag) remap(); } +} +/* ---------------------------------------------------------------------- */ + +void FixNPTCauchy::pre_force_respa(int /*vflag*/, int ilevel, int /*iloop*/) +{ // if barostat, redo KSpace coeffs at outermost level, // since volume has changed diff --git a/src/USER-MISC/fix_npt_cauchy.h b/src/USER-MISC/fix_npt_cauchy.h index 1348b1bce4..80122ad0ac 100644 --- a/src/USER-MISC/fix_npt_cauchy.h +++ b/src/USER-MISC/fix_npt_cauchy.h @@ -35,6 +35,7 @@ class FixNPTCauchy : public Fix { virtual void initial_integrate(int); virtual void final_integrate(); void initial_integrate_respa(int, int, int); + void pre_force_respa(int, int, int); void final_integrate_respa(int, int); virtual void pre_exchange(); double compute_scalar(); diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index 40940977a6..c906000ec5 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -635,6 +635,7 @@ int FixNH::setmask() mask |= FINAL_INTEGRATE; mask |= INITIAL_INTEGRATE_RESPA; mask |= FINAL_INTEGRATE_RESPA; + mask |= PRE_FORCE_RESPA; if (pre_exchange_flag) mask |= PRE_EXCHANGE; return mask; } @@ -1006,7 +1007,12 @@ void FixNH::initial_integrate_respa(int /*vflag*/, int ilevel, int /*iloop*/) nve_x(); if (pstat_flag) remap(); } +} +/* ---------------------------------------------------------------------- */ + +void FixNH::pre_force_respa(int /*vflag*/, int ilevel, int /*iloop*/) +{ // if barostat, redo KSpace coeffs at outermost level, // since volume has changed diff --git a/src/fix_nh.h b/src/fix_nh.h index ef541a0e05..1ec6bec2f8 100644 --- a/src/fix_nh.h +++ b/src/fix_nh.h @@ -28,6 +28,7 @@ class FixNH : public Fix { virtual void initial_integrate(int); virtual void final_integrate(); void initial_integrate_respa(int, int, int); + void pre_force_respa(int, int, int); void final_integrate_respa(int, int); virtual void pre_exchange(); double compute_scalar(); diff --git a/src/lmptype.h b/src/lmptype.h index 6a7a7ef1b9..871bf5ff6c 100644 --- a/src/lmptype.h +++ b/src/lmptype.h @@ -242,9 +242,9 @@ union ubuf { // define stack variable alignment -#if defined(__INTEL_LLVM_COMPILER) || defined(__INTEL_COMPILER) +#if defined(__INTEL_COMPILER) #define _alignvar(expr, val) __declspec(align(val)) expr -#elif defined(__GNUC__) || defined(__PGI) +#elif defined(__GNUC__) || defined(__PGI) || defined(__INTEL_LLVM_COMPILER) #define _alignvar(expr, val) expr __attribute((aligned(val))) #else #define _alignvar(expr, val) expr @@ -266,7 +266,7 @@ union ubuf { #if defined(__clang__) #define _noopt __attribute__((optnone)) -#elif defined(__INTEL_COMPILER) +#elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) #define _noopt #elif defined(__PGI) #define _noopt diff --git a/src/memory.cpp b/src/memory.cpp index 3e67ed1496..8f7faad545 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -16,7 +16,8 @@ #include "error.h" -#if defined(LMP_USER_INTEL) && defined(__INTEL_COMPILER) +#if defined(LMP_USER_INTEL) && \ + ((defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER))) #ifndef LMP_INTEL_NO_TBB #define LMP_USE_TBB_ALLOCATOR #include "tbb/scalable_allocator.h" @@ -81,7 +82,7 @@ void *Memory::srealloc(void *ptr, bigint nbytes, const char *name) #if defined(LMP_USE_TBB_ALLOCATOR) ptr = scalable_aligned_realloc(ptr, nbytes, LAMMPS_MEMALIGN); #elif defined(LMP_INTEL_NO_TBB) && defined(LAMMPS_MEMALIGN) && \ - defined(__INTEL_COMPILER) + (defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)) ptr = realloc(ptr, nbytes); uintptr_t offset = ((uintptr_t)(const void *)(ptr)) % LAMMPS_MEMALIGN;