// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ #include "bond_harmonic_intel.h" #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" #include "memory.h" #include "modify.h" #include "neighbor.h" #include "suffix.h" #include #include #include "omp_compat.h" using namespace LAMMPS_NS; typedef struct { int a,b,t; } int3_t; /* ---------------------------------------------------------------------- */ BondHarmonicIntel::BondHarmonicIntel(LAMMPS *lmp) : BondHarmonic(lmp) { suffix_flag |= Suffix::INTEL; } /* ---------------------------------------------------------------------- */ void BondHarmonicIntel::compute(int eflag, int vflag) { #ifdef _LMP_INTEL_OFFLOAD if (_use_base) { BondHarmonic::compute(eflag, vflag); return; } #endif if (fix->precision() == FixIntel::PREC_MODE_MIXED) compute(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) compute(eflag, vflag, fix->get_double_buffers(), force_const_double); else compute(eflag, vflag, fix->get_single_buffers(), force_const_single); } /* ---------------------------------------------------------------------- */ template void BondHarmonicIntel::compute(int eflag, int vflag, IntelBuffers *buffers, const ForceConst &fc) { ev_init(eflag,vflag); if (vflag_atom) error->all(FLERR,"INTEL package does not support per-atom stress"); if (evflag) { if (vflag && !eflag) { if (force->newton_bond) eval<0,1,1>(vflag, buffers, fc); else eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) eval<1,1,1>(vflag, buffers, fc); else eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) eval<0,0,1>(vflag, buffers, fc); else eval<0,0,0>(vflag, buffers, fc); } } template void BondHarmonicIntel::eval(const int vflag, IntelBuffers *buffers, const ForceConst &fc) { const int inum = neighbor->nbondlist; if (inum == 0) return; ATOM_T * _noalias const x = buffers->get_x(0); const int nlocal = atom->nlocal; const int nall = nlocal + atom->nghost; int f_stride; if (NEWTON_BOND) f_stride = buffers->get_stride(nall); else f_stride = buffers->get_stride(nlocal); int tc; FORCE_T * _noalias f_start; acc_t * _noalias ev_global; IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global); const int nthreads = tc; acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5; if (EFLAG) oebond = (acc_t)0.0; if (VFLAG && vflag) { ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5) #endif { int nfrom, npl, nto, tid; #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); #else IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); const int3_t * _noalias const bondlist = (int3_t *) neighbor->bondlist[0]; #ifdef LMP_INTEL_USE_SIMDOFF acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5; if (EFLAG) sebond = (acc_t)0.0; if (VFLAG && vflag) { sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; } #if defined(USE_OMP_SIMD) #pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5) #else #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int n = nfrom; n < nto; n ++) { #else for (int n = nfrom; n < nto; n += npl) { #endif const int i1 = IP_PRE_dword_index(bondlist[n].a); const int i2 = IP_PRE_dword_index(bondlist[n].b); const int type = IP_PRE_dword_index(bondlist[n].t); const flt_t delx = x[i1].x - x[i2].x; const flt_t dely = x[i1].y - x[i2].y; const flt_t delz = x[i1].z - x[i2].z; const flt_t rsq = delx*delx + dely*dely + delz*delz; const flt_t r = std::sqrt(rsq); const flt_t dr = r - fc.fc[type].r0; const flt_t rk = fc.fc[type].k * dr; // force & energy flt_t fbond; if (r > (flt_t)0.0) fbond = (flt_t)-2.0*rk/r; else fbond = (flt_t)0.0; flt_t ebond; if (EFLAG) ebond = rk*dr; // apply force to each of 2 atoms #ifdef LMP_INTEL_USE_SIMDOFF #if defined(USE_OMP_SIMD) #pragma omp ordered simd #else #pragma simdoff #endif #endif { if (NEWTON_BOND || i1 < nlocal) { f[i1].x += delx*fbond; f[i1].y += dely*fbond; f[i1].z += delz*fbond; } if (NEWTON_BOND || i2 < nlocal) { f[i2].x -= delx*fbond; f[i2].y -= dely*fbond; f[i2].z -= delz*fbond; } } if (EFLAG || VFLAG) { #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond, delx, dely, delz, sebond, f, NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3, sv4, sv5); #else IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond, delx, dely, delz, oebond, f, NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, ov4, ov5); #endif } } // for n #ifdef LMP_INTEL_USE_SIMDOFF if (EFLAG) oebond += sebond; if (VFLAG && vflag) { ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5; } #endif } // omp parallel if (EFLAG) energy += oebond; if (VFLAG && vflag) { virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); } /* ---------------------------------------------------------------------- */ void BondHarmonicIntel::init_style() { BondHarmonic::init_style(); fix = static_cast(modify->get_fix_by_id("package_intel")); if (!fix) error->all(FLERR, "The 'package intel' command is required for /intel styles"); #ifdef _LMP_INTEL_OFFLOAD _use_base = 0; if (fix->offload_balance() != 0.0) { _use_base = 1; return; } #endif fix->bond_init_check(); if (fix->precision() == FixIntel::PREC_MODE_MIXED) pack_force_const(force_const_single, fix->get_mixed_buffers()); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) pack_force_const(force_const_double, fix->get_double_buffers()); else pack_force_const(force_const_single, fix->get_single_buffers()); } /* ---------------------------------------------------------------------- */ template void BondHarmonicIntel::pack_force_const(ForceConst &fc, IntelBuffers * /*buffers*/) { const int bp1 = atom->nbondtypes + 1; fc.set_ntypes(bp1,memory); for (int i = 1; i < bp1; i++) { fc.fc[i].k = k[i]; fc.fc[i].r0 = r0[i]; } } /* ---------------------------------------------------------------------- */ template void BondHarmonicIntel::ForceConst::set_ntypes(const int nbondtypes, Memory *memory) { if (memory != nullptr) _memory = memory; if (nbondtypes != _nbondtypes) { _memory->destroy(fc); if (nbondtypes > 0) _memory->create(fc,nbondtypes,"bondharmonicintel.fc"); } _nbondtypes = nbondtypes; }