Merge pull request #3921 from wmbrownIntel/snap-intel

Adding intel variant of snap pair style.
This commit is contained in:
Axel Kohlmeyer
2023-10-10 11:02:43 -04:00
committed by GitHub
10 changed files with 2798 additions and 3 deletions

View File

@ -265,7 +265,7 @@ OPT.
* :doc:`smd/tri_surface <pair_smd_triangulated_surface>` * :doc:`smd/tri_surface <pair_smd_triangulated_surface>`
* :doc:`smd/ulsph <pair_smd_ulsph>` * :doc:`smd/ulsph <pair_smd_ulsph>`
* :doc:`smtbq <pair_smtbq>` * :doc:`smtbq <pair_smtbq>`
* :doc:`snap (k) <pair_snap>` * :doc:`snap (ik) <pair_snap>`
* :doc:`soft (go) <pair_soft>` * :doc:`soft (go) <pair_soft>`
* :doc:`sph/heatconduction <pair_sph_heatconduction>` * :doc:`sph/heatconduction <pair_sph_heatconduction>`
* :doc:`sph/idealgas <pair_sph_idealgas>` * :doc:`sph/idealgas <pair_sph_idealgas>`

View File

@ -1,10 +1,11 @@
.. index:: pair_style snap .. index:: pair_style snap
.. index:: pair_style snap/intel
.. index:: pair_style snap/kk .. index:: pair_style snap/kk
pair_style snap command pair_style snap command
======================= =======================
Accelerator Variants: *snap/kk* Accelerator Variants: *snap/intel*, *snap/kk*
Syntax Syntax
"""""" """"""
@ -260,6 +261,14 @@ This style is part of the ML-SNAP package. It is only enabled if LAMMPS
was built with that package. See the :doc:`Build package was built with that package. See the :doc:`Build package
<Build_package>` page for more info. <Build_package>` page for more info.
The *snap/intel* accelerator variant will *only* be available if LAMMPS
is built with Intel *compilers* and for CPUs with AVX-512 support.
While the INTEL package in general allows multiple floating point
precision modes to be selected, *snap/intel* will currently always use
full double precision regardless of the precision mode selected.
Additionally, the *intel* variant of snap will **NOT** use multiple
threads with OpenMP.
Related commands Related commands
"""""""""""""""" """"""""""""""""

View File

@ -185,6 +185,7 @@ fi
if (test $1 = "ML-SNAP") then if (test $1 = "ML-SNAP") then
depend ML-IAP depend ML-IAP
depend KOKKOS depend KOKKOS
depend INTEL
fi fi
if (test $1 = "CG-SPICA") then if (test $1 = "CG-SPICA") then

View File

@ -0,0 +1,70 @@
# Toy demonstration of SNAP "scale" parameter, using fix/adapt and hybrid/overlay
# Mixing linear and quadratic SNAP Ni potentials by Zuo et al. JCPA 2020
variable w index 10 # Warmup Timesteps
variable t index 100 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node
variable x index 4
variable y index 2
variable z index 2
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
if "$n > 0" then "processors * * * grid numa"
# mixing parameter
variable lambda equal 0.2
# Initialize simulation
variable a equal 3.52
units metal
# generate the box and atom positions using a FCC lattice
variable nx equal 20*$x
variable ny equal 20*$y
variable nz equal 20*$z
boundary p p p
lattice fcc $a
region box block 0 ${nx} 0 ${ny} 0 ${nz}
create_box 1 box
create_atoms 1 box
mass 1 34.
# choose bundled SNAP Ni potential from Zuo et al. JCPA 2020
pair_style hybrid/overlay snap snap
pair_coeff * * snap 1 &
${root}/examples/snap/Ni_Zuo_JPCA2020.snapcoeff &
${root}/examples/snap/Ni_Zuo_JPCA2020.snapparam Ni
pair_coeff * * snap 2 &
${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapcoeff &
${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapparam Ni
# scale according to mixing parameter
variable l1 equal ${lambda}
variable l2 equal 1.0-${lambda}
fix scale1 all adapt 1 pair snap:1 scale * * v_l1
fix scale2 all adapt 1 pair snap:2 scale * * v_l2
# Setup output
thermo 1
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459 loop geom
fix 1 all nve
if "$w > 0" then "run $w"
run ${rr}

View File

@ -35,7 +35,7 @@ export I_MPI_PIN_DOMAIN=core
# End settings for your system # End settings for your system
######################################################################### #########################################################################
export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff" export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff snap"
export LMP_ARGS="-pk intel 0 -sf intel -screen none -v d 1" export LMP_ARGS="-pk intel 0 -sf intel -screen none -v d 1"
export RLMP_ARGS="-pk intel 0 lrt yes -sf intel -screen none -v d 1" export RLMP_ARGS="-pk intel 0 lrt yes -sf intel -screen none -v d 1"

View File

@ -46,13 +46,38 @@ namespace ip_simd {
typedef __mmask16 SIMD_mask; typedef __mmask16 SIMD_mask;
inline bool any(const SIMD_mask &m) { return m != 0; }
struct SIMD_int { struct SIMD_int {
__m512i v; __m512i v;
SIMD_int() {} SIMD_int() {}
SIMD_int(const __m512i in) : v(in) {} SIMD_int(const __m512i in) : v(in) {}
inline int & operator[](const int i) { return ((int *)&(v))[i]; }
inline const int & operator[](const int i) const
{ return ((int *)&(v))[i]; }
operator __m512i() const { return v;} operator __m512i() const { return v;}
}; };
struct SIMD256_int {
__m256i v;
SIMD256_int() {}
SIMD256_int(const __m256i in) : v(in) {}
SIMD256_int(const int in) : v(_mm256_set1_epi32(in)) {}
inline int & operator[](const int i) { return ((int *)&(v))[i]; }
inline const int & operator[](const int i) const
{ return ((int *)&(v))[i]; }
#ifdef __INTEL_LLVM_COMPILER
inline SIMD256_int operator&=(const int i)
{ v=_mm256_and_epi32(v, _mm256_set1_epi32(i)); return *this; };
#else
inline SIMD256_int operator&=(const int i)
{ v=_mm256_and_si256(v, _mm256_set1_epi32(i)); return *this; };
#endif
inline SIMD256_int operator+=(const int i)
{ v=_mm256_add_epi32(v, _mm256_set1_epi32(i)); return *this; };
operator __m256i() const { return v;}
};
struct SIMD_float { struct SIMD_float {
__m512 v; __m512 v;
SIMD_float() {} SIMD_float() {}
@ -64,7 +89,24 @@ namespace ip_simd {
__m512d v; __m512d v;
SIMD_double() {} SIMD_double() {}
SIMD_double(const __m512d in) : v(in) {} SIMD_double(const __m512d in) : v(in) {}
SIMD_double(const double in) { v=_mm512_set1_pd(in); }
inline double & operator[](const int i) { return ((double *)&(v))[i]; }
inline const double & operator[](const int i) const
{ return ((double *)&(v))[i]; }
operator __m512d() const { return v;} operator __m512d() const { return v;}
SIMD_double & operator=(const double i)
{ _mm512_set1_pd(i); return *this; }
SIMD_double &operator=(const SIMD_double &i)
{ v = i.v; return *this; }
SIMD_double operator-() { return _mm512_xor_pd(v, _mm512_set1_pd(-0.0)); }
SIMD_double & operator+=(const SIMD_double & two)
{ v = _mm512_add_pd(v, two.v); return *this; }
SIMD_double & operator-=(const SIMD_double & two)
{ v = _mm512_sub_pd(v, two.v); return *this; }
SIMD_double & operator*=(const SIMD_double & two)
{ v = _mm512_mul_pd(v, two.v); return *this; }
}; };
template<class flt_t> template<class flt_t>
@ -99,6 +141,12 @@ namespace ip_simd {
// ------- Set Operations // ------- Set Operations
inline SIMD256_int SIMD256_set(const int l0, const int l1, const int l2,
const int l3, const int l4, const int l5,
const int l6, const int l7) {
return _mm256_setr_epi32(l0,l1,l2,l3,l4,l5,l6,l7);
}
inline SIMD_int SIMD_set(const int l0, const int l1, const int l2, inline SIMD_int SIMD_set(const int l0, const int l1, const int l2,
const int l3, const int l4, const int l5, const int l3, const int l4, const int l5,
const int l6, const int l7, const int l8, const int l6, const int l7, const int l8,
@ -109,6 +157,10 @@ namespace ip_simd {
l8,l9,l10,l11,l12,l13,l14,l15); l8,l9,l10,l11,l12,l13,l14,l15);
} }
inline SIMD256_int SIMD256_set(const int l) {
return _mm256_set1_epi32(l);
}
inline SIMD_int SIMD_set(const int l) { inline SIMD_int SIMD_set(const int l) {
return _mm512_set1_epi32(l); return _mm512_set1_epi32(l);
} }
@ -121,6 +173,10 @@ namespace ip_simd {
return _mm512_set1_pd(l); return _mm512_set1_pd(l);
} }
inline SIMD256_int SIMD256_count() {
return SIMD256_set(0,1,2,3,4,5,6,7);
}
inline SIMD_int SIMD_zero_masked(const SIMD_mask &m, const SIMD_int &one) { inline SIMD_int SIMD_zero_masked(const SIMD_mask &m, const SIMD_int &one) {
return _mm512_maskz_mov_epi32(m, one); return _mm512_maskz_mov_epi32(m, one);
} }
@ -147,6 +203,10 @@ namespace ip_simd {
// -------- Load Operations // -------- Load Operations
inline SIMD256_int SIMD_load(const SIMD256_int *p) {
return _mm256_load_epi32((int *)p);
}
inline SIMD_int SIMD_load(const int *p) { inline SIMD_int SIMD_load(const int *p) {
return _mm512_load_epi32(p); return _mm512_load_epi32(p);
} }
@ -159,6 +219,10 @@ namespace ip_simd {
return _mm512_load_pd(p); return _mm512_load_pd(p);
} }
inline SIMD_double SIMD_load(const SIMD_double *p) {
return _mm512_load_pd((double *)p);
}
inline SIMD_int SIMD_loadz(const SIMD_mask &m, const int *p) { inline SIMD_int SIMD_loadz(const SIMD_mask &m, const int *p) {
return _mm512_maskz_load_epi32(m, p); return _mm512_maskz_load_epi32(m, p);
} }
@ -171,6 +235,10 @@ namespace ip_simd {
return _mm512_maskz_load_pd(m, p); return _mm512_maskz_load_pd(m, p);
} }
inline SIMD256_int SIMD_gather(const int *p, const SIMD256_int &i) {
return _mm256_i32gather_epi32(p, i, _MM_SCALE_4);
}
inline SIMD_int SIMD_gather(const int *p, const SIMD_int &i) { inline SIMD_int SIMD_gather(const int *p, const SIMD_int &i) {
return _mm512_i32gather_epi32(i, p, _MM_SCALE_4); return _mm512_i32gather_epi32(i, p, _MM_SCALE_4);
} }
@ -179,6 +247,10 @@ namespace ip_simd {
return _mm512_i32gather_ps(i, p, _MM_SCALE_4); return _mm512_i32gather_ps(i, p, _MM_SCALE_4);
} }
inline SIMD_double SIMD_gather(const double *p, const SIMD256_int &i) {
return _mm512_i32gather_pd(i, p, _MM_SCALE_8);
}
inline SIMD_double SIMD_gather(const double *p, const SIMD_int &i) { inline SIMD_double SIMD_gather(const double *p, const SIMD_int &i) {
return _mm512_i32gather_pd(_mm512_castsi512_si256(i), p, _MM_SCALE_8); return _mm512_i32gather_pd(_mm512_castsi512_si256(i), p, _MM_SCALE_8);
} }
@ -201,6 +273,12 @@ namespace ip_simd {
_mm512_castsi512_si256(i), p, _MM_SCALE_8); _mm512_castsi512_si256(i), p, _MM_SCALE_8);
} }
inline SIMD_double SIMD_gather(const SIMD_mask &m, const double *p,
const SIMD256_int &i) {
return _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
i, p, _MM_SCALE_8);
}
template <typename T> template <typename T>
inline SIMD_int SIMD_gatherz_offset(const SIMD_mask &m, const int *p, inline SIMD_int SIMD_gatherz_offset(const SIMD_mask &m, const int *p,
const SIMD_int &i) { const SIMD_int &i) {
@ -252,6 +330,15 @@ namespace ip_simd {
return _mm512_store_pd(p,one); return _mm512_store_pd(p,one);
} }
inline void SIMD_store(SIMD_double *p, const SIMD_double &one) {
return _mm512_store_pd((double *)p,one);
}
inline void SIMD_scatter(const SIMD_mask &m, int *p,
const SIMD256_int &i, const SIMD256_int &vec) {
_mm256_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
}
inline void SIMD_scatter(const SIMD_mask &m, int *p, inline void SIMD_scatter(const SIMD_mask &m, int *p,
const SIMD_int &i, const SIMD_int &vec) { const SIMD_int &i, const SIMD_int &vec) {
_mm512_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4); _mm512_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
@ -268,8 +355,22 @@ namespace ip_simd {
_MM_SCALE_8); _MM_SCALE_8);
} }
inline void SIMD_scatter(const SIMD_mask &m, double *p,
const SIMD256_int &i, const SIMD_double &vec) {
_mm512_mask_i32scatter_pd(p, m, i, vec, _MM_SCALE_8);
}
inline void SIMD_scatter(double *p,
const SIMD256_int &i, const SIMD_double &vec) {
_mm512_i32scatter_pd(p, i, vec, _MM_SCALE_8);
}
// ------- Arithmetic Operations // ------- Arithmetic Operations
inline SIMD256_int operator+(const SIMD256_int &one, const SIMD256_int &two) {
return _mm256_add_epi32(one,two);
}
inline SIMD_int operator+(const SIMD_int &one, const SIMD_int &two) { inline SIMD_int operator+(const SIMD_int &one, const SIMD_int &two) {
return _mm512_add_epi32(one,two); return _mm512_add_epi32(one,two);
} }
@ -286,6 +387,10 @@ namespace ip_simd {
return _mm512_add_epi32(one,SIMD_set(two)); return _mm512_add_epi32(one,SIMD_set(two));
} }
inline SIMD256_int operator+(const SIMD256_int &one, const int two) {
return _mm256_add_epi32(one,SIMD256_set(two));
}
inline SIMD_float operator+(const SIMD_float &one, const float two) { inline SIMD_float operator+(const SIMD_float &one, const float two) {
return _mm512_add_ps(one,SIMD_set(two)); return _mm512_add_ps(one,SIMD_set(two));
} }
@ -299,6 +404,11 @@ namespace ip_simd {
return _mm512_mask_add_epi32(one,m,one,SIMD_set(two)); return _mm512_mask_add_epi32(one,m,one,SIMD_set(two));
} }
inline SIMD256_int SIMD_add(const SIMD_mask &m,
const SIMD256_int &one, const int two) {
return _mm256_mask_add_epi32(one,m,one,SIMD256_set(two));
}
inline SIMD_float SIMD_add(const SIMD_mask &m, inline SIMD_float SIMD_add(const SIMD_mask &m,
const SIMD_float &one, const float two) { const SIMD_float &one, const float two) {
return _mm512_mask_add_ps(one,m,one,SIMD_set(two)); return _mm512_mask_add_ps(one,m,one,SIMD_set(two));
@ -309,6 +419,11 @@ namespace ip_simd {
return _mm512_mask_add_pd(one,m,one,SIMD_set(two)); return _mm512_mask_add_pd(one,m,one,SIMD_set(two));
} }
inline SIMD_double SIMD_add(const SIMD_mask &m,
const SIMD_double &one, const SIMD_double &two) {
return _mm512_mask_add_pd(one,m,one,two);
}
inline SIMD_int SIMD_add(const SIMD_int &s, const SIMD_mask &m, inline SIMD_int SIMD_add(const SIMD_int &s, const SIMD_mask &m,
const SIMD_int &one, const SIMD_int &two) { const SIMD_int &one, const SIMD_int &two) {
return _mm512_mask_add_epi32(s,m,one,two); return _mm512_mask_add_epi32(s,m,one,two);
@ -387,6 +502,10 @@ namespace ip_simd {
return _mm512_mul_pd(one,two); return _mm512_mul_pd(one,two);
} }
inline SIMD256_int operator*(const SIMD256_int &one, const int two) {
return _mm256_mullo_epi32(one,SIMD256_set(two));
}
inline SIMD_int operator*(const SIMD_int &one, const int two) { inline SIMD_int operator*(const SIMD_int &one, const int two) {
return _mm512_mullo_epi32(one,SIMD_set(two)); return _mm512_mullo_epi32(one,SIMD_set(two));
} }
@ -417,6 +536,12 @@ namespace ip_simd {
return _mm512_fmadd_pd(one,two,three); return _mm512_fmadd_pd(one,two,three);
} }
inline SIMD_double SIMD_fma(const SIMD_mask m, const SIMD_double &one,
const SIMD_double &two,
const SIMD_double &three) {
return _mm512_mask3_fmadd_pd(one,two,three,m);
}
inline SIMD_float SIMD_fms(const SIMD_float &one, const SIMD_float &two, inline SIMD_float SIMD_fms(const SIMD_float &one, const SIMD_float &two,
const SIMD_float &three) { const SIMD_float &three) {
return _mm512_fmsub_ps(one,two,three); return _mm512_fmsub_ps(one,two,three);
@ -493,6 +618,10 @@ namespace ip_simd {
return _mm512_pow_pd(one, two); return _mm512_pow_pd(one, two);
} }
inline SIMD_double SIMD_pow(const SIMD_double &one, const double two) {
return _mm512_pow_pd(one, SIMD_set(two));
}
inline SIMD_float SIMD_exp(const SIMD_float &one) { inline SIMD_float SIMD_exp(const SIMD_float &one) {
return _mm512_exp_ps(one); return _mm512_exp_ps(one);
} }
@ -501,6 +630,18 @@ namespace ip_simd {
return _mm512_exp_pd(one); return _mm512_exp_pd(one);
} }
inline SIMD_double SIMD_cos(const SIMD_double &one) {
return _mm512_cos_pd(one);
}
inline SIMD_double SIMD_sin(const SIMD_double &one) {
return _mm512_sin_pd(one);
}
inline SIMD_double SIMD_tan(const SIMD_double &one) {
return _mm512_tan_pd(one);
}
// ------- Comparison operations // ------- Comparison operations
inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_int &one, inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_int &one,
@ -533,6 +674,14 @@ namespace ip_simd {
return _mm512_mask_cmplt_pd_mask(m, SIMD_set(one), two); return _mm512_mask_cmplt_pd_mask(m, SIMD_set(one), two);
} }
inline SIMD_mask operator<(const SIMD256_int &one, const SIMD256_int &two) {
return _mm256_cmplt_epi32_mask(one,two);
}
inline SIMD_mask operator<(const int one, const SIMD256_int &two) {
return _mm256_cmplt_epi32_mask(SIMD256_set(one),two);
}
inline SIMD_mask operator<(const SIMD_int &one, const SIMD_int &two) { inline SIMD_mask operator<(const SIMD_int &one, const SIMD_int &two) {
return _mm512_cmplt_epi32_mask(one,two); return _mm512_cmplt_epi32_mask(one,two);
} }
@ -577,6 +726,10 @@ namespace ip_simd {
return _mm512_cmple_ps_mask(SIMD_set(one), two); return _mm512_cmple_ps_mask(SIMD_set(one), two);
} }
inline SIMD_mask operator<=(const SIMD_double &one, const SIMD_double &two) {
return _mm512_cmple_pd_mask(one, two);
}
inline SIMD_mask operator<=(const double one, const SIMD_double &two) { inline SIMD_mask operator<=(const double one, const SIMD_double &two) {
return _mm512_cmple_pd_mask(SIMD_set(one), two); return _mm512_cmple_pd_mask(SIMD_set(one), two);
} }
@ -593,6 +746,14 @@ namespace ip_simd {
return _mm512_cmplt_pd_mask(two,one); return _mm512_cmplt_pd_mask(two,one);
} }
inline SIMD_mask operator>(const SIMD_double &one, const double two) {
return _mm512_cmplt_pd_mask(SIMD_set(two),one);
}
inline SIMD_mask operator==(const SIMD256_int &one, const int two) {
return _mm256_cmpeq_epi32_mask(one,_mm256_set1_epi32(two));
}
inline SIMD_mask operator==(const SIMD_int &one, const SIMD_int &two) { inline SIMD_mask operator==(const SIMD_int &one, const SIMD_int &two) {
return _mm512_cmpeq_epi32_mask(one,two); return _mm512_cmpeq_epi32_mask(one,two);
} }

View File

@ -0,0 +1,779 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#include "pair_snap_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "memory.h"
#include "modify.h"
#include "neigh_list.h"
#include "neighbor.h"
#include "sna_intel.h"
#include "tokenizer.h"
#include <cmath>
#include <cstring>
using namespace LAMMPS_NS;
using namespace ip_simd;
#define MAXLINE 1024
#define MAXWORD 3
/* ---------------------------------------------------------------------- */
PairSNAPIntel::PairSNAPIntel(LAMMPS *lmp) : Pair(lmp)
{
single_enable = 0;
restartinfo = 0;
one_coeff = 1;
manybody_flag = 1;
centroidstressflag = CENTROID_NOTAVAIL;
radelem = nullptr;
wjelem = nullptr;
coeffelem = nullptr;
sinnerelem = nullptr;
dinnerelem = nullptr;
beta = nullptr;
bispectrum = nullptr;
snaptr = nullptr;
}
/* ---------------------------------------------------------------------- */
PairSNAPIntel::~PairSNAPIntel()
{
if (copymode) return;
memory->destroy(radelem);
memory->destroy(wjelem);
memory->destroy(coeffelem);
memory->destroy(sinnerelem);
memory->destroy(dinnerelem);
memory->destroy(beta);
memory->destroy(bispectrum);
delete snaptr;
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
memory->destroy(scale);
}
}
/* ----------------------------------------------------------------------
This version is a straightforward implementation
---------------------------------------------------------------------- */
void PairSNAPIntel::compute(int eflag, int vflag)
{
SNA_DVEC fij[3];
int *jlist,*numneigh,**firstneigh;
ev_init(eflag,vflag);
int tally_xyz = 0;
if (vflag_atom || (vflag && !vflag_fdotr)) tally_xyz = 1;
double **x = atom->x;
double *_x = atom->x[0];
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
int newton_pair = force->newton_pair;
// compute dE_i/dB_i = beta_i for all i in list
numneigh = list->numneigh;
firstneigh = list->firstneigh;
SNA_DVEC sevdwl(0);
const int vw = snaptr->vector_width();
for (int ii = 0; ii < list->inum; ii+=vw) {
SNA_IVEC i, jnum;
int max_jnum = 0;
for (int l = 0; l < vw; l++) {
if (ii + l < list->inum) {
i[l] = list->ilist[ii + l];
jnum[l] = numneigh[i[l]];
} else {
i[l] = list->ilist[0];
jnum[l] = 0;
}
if (jnum[l] > max_jnum) max_jnum = jnum[l];
}
// ensure rij, inside, wj, and rcutij are of size jnum
snaptr->grow_rij(max_jnum);
SNA_IVEC zero_vec(0);
const SNA_DVEC xtmp = SIMD_gather(_x, i * 3);
const SNA_DVEC ytmp = SIMD_gather(_x, i * 3 + 1);
const SNA_DVEC ztmp = SIMD_gather(_x, i * 3 + 2);
const SNA_IVEC itype = SIMD_gather(type, i);
const SNA_IVEC ielem = SIMD_gather(map, itype);
const SNA_DVEC radi = SIMD_gather(radelem, ielem);
// rij[][3] = displacements between atom I and those neighbors
// inside = indices of neighbors of I within cutoff
// wj = weights for neighbors of I within cutoff
// rcutij = cutoffs for neighbors of I within cutoff
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
SNA_IVEC ninside(0);
for (int jj = 0; jj < max_jnum; jj++) {
SIMD_mask m(SIMD256_set(jj) < jnum);
SNA_IVEC j;
SV_for (int l = 0; l < vw; l++) {
jlist = firstneigh[i[l]];
if (jj < jnum[l]) j[l] = jlist[jj];
else j[l] = 0;
}
j &= NEIGHMASK;
const SNA_DVEC delx = SIMD_gather(m, _x, j * 3) - xtmp;
const SNA_DVEC dely = SIMD_gather(m, _x, j * 3 + 1) - ytmp;
const SNA_DVEC delz = SIMD_gather(m, _x, j * 3 + 2) - ztmp;
const SNA_IVEC jtype = SIMD_gather(type, j);
const SNA_DVEC rsq = delx*delx + dely*dely + delz*delz;
const SNA_DVEC vcut = SIMD_gather(m, cutsq[0],
itype * (atom->ntypes + 1) + jtype);
m &= rsq < vcut;
m &= rsq > SIMD_set(1e-20);
const SNA_IVEC jelem = SIMD_gather(map, jtype);
const SNA_IVEC ni3 = ninside * vw * 3 + SIMD256_count();
SIMD_scatter(m, (double *)(snaptr->rij[0]), ni3, delx);
SIMD_scatter(m, (double *)(snaptr->rij[0] + 1), ni3, dely);
SIMD_scatter(m, (double *)(snaptr->rij[0] + 2), ni3, delz);
const SNA_IVEC ni = ninside * vw + SIMD256_count();
SIMD_scatter(m, (int *)(snaptr->inside), ni, j);
SIMD_scatter(m, (double *)(snaptr->wj), ni,
SIMD_gather(m, wjelem, jelem));
SIMD_scatter(m, (double *)(snaptr->rcutij), ni,
(radi + SIMD_gather(m, radelem, jelem)) * rcutfac);
if (switchinnerflag) {
SIMD_scatter(m, (double *)(snaptr->sinnerij), ni,
(SIMD_gather(m, sinnerelem, ielem) +
SIMD_gather(m, sinnerelem, jelem)) * 0.5);
SIMD_scatter(m, (double *)(snaptr->dinnerij), ni,
(SIMD_gather(m, dinnerelem, ielem) +
SIMD_gather(m, dinnerelem, jelem)) * 0.5);
}
if (chemflag)
SIMD_scatter(m, (int *)(snaptr->element), ni, jelem);
ninside = SIMD_add(m, ninside, 1);
} // for jj
// compute Ui, Yi for atom I
if (chemflag)
snaptr->compute_ui(ninside, ielem, max_jnum);
else
snaptr->compute_ui(ninside, zero_vec, max_jnum);
// Compute bispectrum
if (quadraticflag || eflag) {
snaptr->compute_zi_or_yi<0>(beta);
if (chemflag)
snaptr->compute_bi(ielem);
else
snaptr->compute_bi(zero_vec);
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
SIMD_store(bispectrum + icoeff, SIMD_load(snaptr->blist + icoeff));
}
// Compute beta
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
SIMD_store(beta + icoeff, SIMD_gather(coeffelem[0],
ielem * ncoeffall + icoeff + 1));
if (quadraticflag) {
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
SNA_DVEC beta_i = SIMD_load(beta + icoeff) +
SIMD_gather(coeffelem[0], ielem * ncoeffall + k) * bveci;
k++;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
const SNA_DVEC ci = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
beta_i = beta_i + ci * SIMD_load(bispectrum + jcoeff);
SIMD_store(beta + jcoeff, ci * bveci + SIMD_load(beta + jcoeff));
k++;
}
SIMD_store(beta + icoeff, beta_i);
}
}
// for neighbors of I within cutoff:
// compute Fij = dEi/dRj = -dEi/dRi
// add to Fi, subtract from Fj
// scaling is that for type I
if (quadraticflag || eflag)
snaptr->compute_yi_from_zi(beta);
else
snaptr->compute_zi_or_yi<1>(beta);
SNA_DVEC fi_x(0.0), fi_y(0.0), fi_z(0.0);
SNA_DVEC scalev = SIMD_gather(scale[0], itype * (atom->ntypes+1) + itype);
for (int jj = 0; jj < max_jnum; jj++) {
snaptr->compute_duidrj(jj, ninside);
if (chemflag && nelements > 1)
snaptr->compute_deidrj_e(jj, ninside, fij);
else
snaptr->compute_deidrj(jj, ninside, fij);
SNA_DVEC fijs_x = fij[0] * scalev;
SNA_DVEC fijs_y = fij[1] * scalev;
SNA_DVEC fijs_z = fij[2] * scalev;
fi_x += fijs_x;
fi_y += fijs_y;
fi_z += fijs_z;
for (int l = 0; l < vw; l++) {
if (jj < ninside[l]) {
int j = snaptr->inside[jj][l];
f[j][0] -= fijs_x[l];
f[j][1] -= fijs_y[l];
f[j][2] -= fijs_z[l];
if (tally_xyz)
ev_tally_xyz(i[l],j,nlocal,newton_pair,0.0,0.0,
fij[0][l],fij[1][l],fij[2][l],
-snaptr->rij[jj][0][l],-snaptr->rij[jj][1][l],
-snaptr->rij[jj][2][l]);
}
} // for l
} // for jj
SIMD_mask m((SIMD256_count() + ii) < list->inum);
SNA_DVEC fix = SIMD_gather(m, f[0], i * 3) + fi_x;
SIMD_scatter(m, f[0], i * 3, fix);
SNA_DVEC fiy = SIMD_gather(m, f[0], i * 3 + 1) + fi_y;
SIMD_scatter(m, f[0], i * 3 + 1, fiy);
SNA_DVEC fiz = SIMD_gather(m, f[0], i * 3 + 2) + fi_z;
SIMD_scatter(m, f[0], i * 3 + 2, fiz);
// tally energy contribution
if (eflag) {
SNA_DVEC evdwl = SIMD_gather(coeffelem[0], ielem * ncoeffall);
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
evdwl += SIMD_gather(coeffelem[0], ielem * ncoeffall + icoeff +1) *
bispectrum[icoeff];
if (quadraticflag) {
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
SNA_DVEC c = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
k++;
evdwl += c * 0.5 * bveci * bveci;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
SNA_DVEC bvecj = SIMD_load(bispectrum + jcoeff);
SNA_DVEC cj = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
k++;
evdwl += cj * bveci * bvecj;
}
}
}
sevdwl += scalev * evdwl;
if (eatom) {
SNA_DVEC ea = SIMD_gather(m, eatom, i) + scalev * evdwl;
SIMD_scatter(m, eatom, i, ea);
}
} // if (eflag)
} // for ii
if (eflag) eng_vdwl += SIMD_sum(sevdwl);
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
void PairSNAPIntel::allocate()
{
allocated = 1;
int n = atom->ntypes;
memory->create(setflag,n+1,n+1,"pair:setflag");
memory->create(cutsq,n+1,n+1,"pair:cutsq");
memory->create(scale,n+1,n+1,"pair:scale");
map = new int[n+1];
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairSNAPIntel::settings(int narg, char ** /* arg */)
{
if (narg > 0)
error->all(FLERR,"Illegal pair_style command");
if ((comm->me == 0) && (comm->nthreads > 1))
error->warning(FLERR, "Pair style snap/intel does not use OpenMP threads");
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
------------------------------------------------------------------------- */
void PairSNAPIntel::coeff(int narg, char **arg)
{
if (!allocated) allocate();
if (narg != 4 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients");
map_element2type(narg-4,arg+4);
// read snapcoeff and snapparam files
read_files(arg[2],arg[3]);
if (!quadraticflag)
ncoeff = ncoeffall - 1;
else {
// ncoeffall should be (ncoeff+2)*(ncoeff+1)/2
// so, ncoeff = floor(sqrt(2*ncoeffall))-1
ncoeff = sqrt(2.0*ncoeffall)-1;
ncoeffq = (ncoeff*(ncoeff+1))/2;
int ntmp = 1+ncoeff+ncoeffq;
if (ntmp != ncoeffall) {
error->all(FLERR,"Incorrect SNAP coeff file");
}
}
snaptr = new SNAIntel(lmp, rfac0, twojmax,
rmin0, switchflag, bzeroflag,
chemflag, bnormflag, wselfallflag,
nelements, switchinnerflag);
if (ncoeff != snaptr->ncoeff) {
if (comm->me == 0)
printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff);
error->all(FLERR,"Incorrect SNAP parameter file");
}
// Calculate maximum cutoff for all elements
rcutmax = 0.0;
for (int ielem = 0; ielem < nelements; ielem++)
rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax);
// set default scaling
int n = atom->ntypes;
for (int ii = 0; ii < n+1; ii++)
for (int jj = 0; jj < n+1; jj++)
scale[ii][jj] = 1.0;
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairSNAPIntel::init_style()
{
if (force->newton_pair == 0)
error->all(FLERR,"Pair style SNAP requires newton pair on");
// need a full neighbor list
neighbor->add_request(this, NeighConst::REQ_FULL);
snaptr->init();
fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
if (!fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
fix->pair_init_check();
memory->create(bispectrum,ncoeff,"PairSNAP:bispectrum");
memory->create(beta,ncoeff,"PairSNAP:beta");
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
double PairSNAPIntel::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
scale[j][i] = scale[i][j];
return (radelem[map[i]] +
radelem[map[j]])*rcutfac;
}
/* ---------------------------------------------------------------------- */
void PairSNAPIntel::read_files(char *coefffilename, char *paramfilename)
{
// open SNAP coefficient file on proc 0
FILE *fpcoeff;
if (comm->me == 0) {
fpcoeff = utils::open_potential(coefffilename,lmp,nullptr);
if (fpcoeff == nullptr)
error->one(FLERR,"Cannot open SNAP coefficient file {}: ",
coefffilename, utils::getsyserror());
}
char line[MAXLINE],*ptr;
int eof = 0;
int nwords = 0;
while (nwords == 0) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof) break;
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
// strip comment, skip line if blank
nwords = utils::count_words(utils::trim_comment(line));
}
if (nwords != 2)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
// strip single and double quotes from words
int nelemtmp = 0;
try {
ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
nelemtmp = words.next_int();
ncoeffall = words.next_int();
} catch (TokenizerException &e) {
error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
}
// clean out old arrays and set up element lists
memory->destroy(radelem);
memory->destroy(wjelem);
memory->destroy(coeffelem);
memory->destroy(sinnerelem);
memory->destroy(dinnerelem);
memory->create(radelem,nelements,"pair:radelem");
memory->create(wjelem,nelements,"pair:wjelem");
memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem");
memory->create(sinnerelem,nelements,"pair:sinnerelem");
memory->create(dinnerelem,nelements,"pair:dinnerelem");
// initialize checklist for all required nelements
int *elementflags = new int[nelements];
for (int jelem = 0; jelem < nelements; jelem++)
elementflags[jelem] = 0;
// loop over nelemtmp blocks in the SNAP coefficient file
for (int ielem = 0; ielem < nelemtmp; ielem++) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
std::vector<std::string> words;
try {
words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
} catch (TokenizerException &) {
// ignore
}
if (words.size() != 3)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
int jelem;
for (jelem = 0; jelem < nelements; jelem++)
if (words[0] == elements[jelem]) break;
// if this element not needed, skip this block
if (jelem == nelements) {
if (comm->me == 0) {
for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
continue;
}
if (elementflags[jelem] == 1)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
else
elementflags[jelem] = 1;
radelem[jelem] = utils::numeric(FLERR,words[1],false,lmp);
wjelem[jelem] = utils::numeric(FLERR,words[2],false,lmp);
if (comm->me == 0)
utils::logmesg(lmp,"SNAP Element = {}, Radius {}, Weight {}\n",
elements[jelem], radelem[jelem], wjelem[jelem]);
for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
try {
ValueTokenizer coeff(utils::trim_comment(line));
if (coeff.count() != 1)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
coeffelem[jelem][icoeff] = coeff.next_double();
} catch (TokenizerException &e) {
error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
}
}
}
if (comm->me == 0) fclose(fpcoeff);
for (int jelem = 0; jelem < nelements; jelem++) {
if (elementflags[jelem] == 0)
error->all(FLERR,"Element {} not found in SNAP coefficient file", elements[jelem]);
}
delete[] elementflags;
// set flags for required keywords
rcutfacflag = 0;
twojmaxflag = 0;
// Set defaults for optional keywords
rfac0 = 0.99363;
rmin0 = 0.0;
switchflag = 1;
bzeroflag = 1;
quadraticflag = 0;
chemflag = 0;
bnormflag = 0;
wselfallflag = 0;
switchinnerflag = 0;
chunksize = 32768;
parallel_thresh = 8192;
// set local input checks
int sinnerflag = 0;
int dinnerflag = 0;
// open SNAP parameter file on proc 0
FILE *fpparam;
if (comm->me == 0) {
fpparam = utils::open_potential(paramfilename,lmp,nullptr);
if (fpparam == nullptr)
error->one(FLERR,"Cannot open SNAP parameter file {}: {}",
paramfilename, utils::getsyserror());
}
eof = 0;
while (true) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpparam);
if (ptr == nullptr) {
eof = 1;
fclose(fpparam);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof) break;
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
// words = ptrs to all words in line
// strip single and double quotes from words
std::vector<std::string> words;
try {
words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
} catch (TokenizerException &) {
// ignore
}
if (words.size() == 0) continue;
if (words.size() < 2)
error->all(FLERR,"Incorrect format in SNAP parameter file");
auto keywd = words[0];
auto keyval = words[1];
// check for keywords with more than one value per element
if (keywd == "sinner" || keywd == "dinner") {
if ((int)words.size() != nelements+1)
error->all(FLERR,"Incorrect SNAP parameter file");
// innerlogstr collects all values of sinner or dinner for log output below
std::string innerlogstr;
int iword = 1;
if (keywd == "sinner") {
for (int ielem = 0; ielem < nelements; ielem++) {
keyval = words[iword];
sinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
iword++;
innerlogstr += keyval + " ";
}
sinnerflag = 1;
} else if (keywd == "dinner") {
for (int ielem = 0; ielem < nelements; ielem++) {
keyval = words[iword];
dinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
iword++;
innerlogstr += keyval + " ";
}
dinnerflag = 1;
}
if (comm->me == 0)
utils::logmesg(lmp,"SNAP keyword {} {} ... \n", keywd, innerlogstr);
} else {
// all other keywords take one value
if (nwords != 2)
error->all(FLERR,"Incorrect SNAP parameter file");
if (comm->me == 0)
utils::logmesg(lmp,"SNAP keyword {} {}\n",keywd,keyval);
if (keywd == "rcutfac") {
rcutfac = utils::numeric(FLERR,keyval,false,lmp);
rcutfacflag = 1;
} else if (keywd == "twojmax") {
twojmax = utils::inumeric(FLERR,keyval,false,lmp);
twojmaxflag = 1;
} else if (keywd == "rfac0")
rfac0 = utils::numeric(FLERR,keyval,false,lmp);
else if (keywd == "rmin0")
rmin0 = utils::numeric(FLERR,keyval,false,lmp);
else if (keywd == "switchflag")
switchflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "bzeroflag")
bzeroflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "quadraticflag")
quadraticflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "chemflag")
chemflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "bnormflag")
bnormflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "wselfallflag")
wselfallflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "switchinnerflag")
switchinnerflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "chunksize")
chunksize = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "parallelthresh")
parallel_thresh = utils::inumeric(FLERR,keyval,false,lmp);
else
error->all(FLERR,"Unknown parameter '{}' in SNAP parameter file", keywd);
}
}
if (rcutfacflag == 0 || twojmaxflag == 0)
error->all(FLERR,"Incorrect SNAP parameter file");
if (chemflag && nelemtmp != nelements)
error->all(FLERR,"Incorrect SNAP parameter file");
if (switchinnerflag && !(sinnerflag && dinnerflag))
error->all(FLERR,"Incorrect SNAP parameter file");
if (!switchinnerflag && (sinnerflag || dinnerflag))
error->all(FLERR,"Incorrect SNAP parameter file");
}
/* ----------------------------------------------------------------------
memory usage
------------------------------------------------------------------------- */
double PairSNAPIntel::memory_usage()
{
double bytes = Pair::memory_usage();
int n = atom->ntypes+1;
bytes += (double)n*n*sizeof(int); // setflag
bytes += (double)n*n*sizeof(double); // cutsq
bytes += (double)n*n*sizeof(double); // scale
bytes += (double)n*sizeof(int); // map
bytes += (double)ncoeff*sizeof(SNA_DVEC); // bispectrum
bytes += (double)ncoeff*sizeof(SNA_DVEC); // beta
bytes += snaptr->memory_usage(); // SNA object
return bytes;
}
/* ---------------------------------------------------------------------- */
void *PairSNAPIntel::extract(const char *str, int &dim)
{
dim = 2;
if (strcmp(str,"scale") == 0) return (void *) scale;
return nullptr;
}
#endif
#endif

View File

@ -0,0 +1,83 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#ifdef PAIR_CLASS
// clang-format off
PairStyle(snap/intel,PairSNAPIntel);
// clang-format on
#else
#ifndef LMP_PAIR_SNAP_INTEL_H
#define LMP_PAIR_SNAP_INTEL_H
#include "fix_intel.h"
#include "pair.h"
namespace ip_simd { class SIMD_double; class SIMD_int; };
#define SNA_DVEC ip_simd::SIMD_double
#define SNA_IVEC ip_simd::SIMD256_int
namespace LAMMPS_NS {
class PairSNAPIntel : public Pair {
public:
PairSNAPIntel(class LAMMPS *);
~PairSNAPIntel() override;
void compute(int, int) override;
void settings(int, char **) override;
void coeff(int, char **) override;
void init_style() override;
double init_one(int, int) override;
double memory_usage() override;
void *extract(const char *, int &) override;
double rcutfac, quadraticflag; // declared public to workaround gcc 4.9
int ncoeff; // compiler bug, manifest in KOKKOS package
protected:
FixIntel *fix;
int ncoeffq, ncoeffall;
class SNAIntel *snaptr;
virtual void allocate();
void read_files(char *, char *);
inline int equal(double *x, double *y);
inline double dist2(double *x, double *y);
double rcutmax; // max cutoff for all elements
double *radelem; // element radii
double *wjelem; // elements weights
double **coeffelem; // element bispectrum coefficients
SNA_DVEC *beta; // betas for all atoms in list
SNA_DVEC *bispectrum; // bispectrum components for all atoms in list
double **scale; // for thermodynamic integration
int twojmax, switchflag, bzeroflag, bnormflag;
int chemflag, wselfallflag;
int switchinnerflag; // inner cutoff switch
double *sinnerelem; // element inner cutoff midpoint
double *dinnerelem; // element inner cutoff half-width
int chunksize, parallel_thresh;
double rfac0, rmin0, wj1, wj2;
int rcutfacflag, twojmaxflag; // flags for required parameters
};
} // namespace LAMMPS_NS
#endif
#endif
#endif
#endif

1505
src/INTEL/sna_intel.cpp Normal file

File diff suppressed because it is too large Load Diff

187
src/INTEL/sna_intel.h Normal file
View File

@ -0,0 +1,187 @@
/* -*- c++ -*- -------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown, Intel
------------------------------------------------------------------------- */
#ifndef LMP_SNA_INTEL_H
#define LMP_SNA_INTEL_H
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#include "pointers.h"
#include "intel_buffers.h"
#include "intel_simd.h"
#define SVW 8
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#define SV_for _Pragma("omp simd") _Pragma("vector aligned") for
#else
#define SV_for _Pragma("simd assert") _Pragma("vector aligned") for
#endif
#else
#define SV_for for
#endif
namespace LAMMPS_NS {
struct SNA_ZINDICES {
int j1, j2, j, ma1min, ma2max, mb1min;
int mb2max, na, nb, jju;
};
struct SNA_BINDICES {
int j1, j2, j;
};
#define SNA_DVEC ip_simd::SIMD_double
#define SNA_IVEC ip_simd::SIMD256_int
class SNAIntel : protected Pointers {
public:
SNAIntel(LAMMPS *, double, int, double, int, int, int, int, int, int, int);
SNAIntel(LAMMPS *lmp) : Pointers(lmp){};
~SNAIntel() override;
void build_indexlist();
void init();
double memory_usage();
int ncoeff;
inline int vector_width() const { return SVW; }
// functions for bispectrum coefficients
void compute_ui(const SNA_IVEC &, const SNA_IVEC &, const int max_jnum);
template <int> void compute_zi_or_yi(const SNA_DVEC *);
void compute_yi_from_zi(const SNA_DVEC *);
void compute_yterm(int, int, int, const double *);
void compute_bi(const SNA_IVEC &);
// functions for derivatives
void compute_duidrj(const int, const SNA_IVEC &);
void compute_deidrj_e(const int, const SNA_IVEC &, SNA_DVEC *);
void compute_deidrj(const int, const SNA_IVEC &, SNA_DVEC *);
double compute_sfac(double, double, double, double);
SNA_DVEC compute_sfac(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &);
inline SNA_DVEC compute_sfac_dsfac(const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &,
SNA_DVEC &);
// public bispectrum data
int twojmax;
SNA_DVEC *blist;
double **dblist;
// short neighbor list data
void grow_rij(int);
int nmax; // allocated size of short lists
SNA_DVEC **rij; // short rij list
SNA_IVEC *inside; // short neighbor list
SNA_DVEC *wj; // short weight list
SNA_DVEC *rcutij; // short cutoff list
// only allocated for switch_inner_flag=1
SNA_DVEC *sinnerij; // short inner cutoff midpoint list
SNA_DVEC *dinnerij; // short inner half-width list
// only allocated for chem_flag=1
SNA_IVEC *element; // short element list [0,nelements)
private:
double rmin0, rfac0;
// data for bispectrum coefficients
SNA_ZINDICES *idxz;
SNA_BINDICES *idxb;
double **rootpqarray;
double *cglist;
int ***idxcg_block;
SNA_DVEC *ulisttot_r, *ulisttot_i;
SNA_DVEC **ulist_r_ij, **ulist_i_ij; // short u list
int *idxu_block;
SNA_DVEC *zlist_r, *zlist_i;
int ***idxz_block;
int ***idxb_block;
SNA_DVEC **dulist_r, **dulist_i;
SNA_DVEC *ylist_r, *ylist_i;
int idxcg_max, idxu_max, idxz_max, idxb_max;
void create_twojmax_arrays();
void destroy_twojmax_arrays();
void init_clebsch_gordan();
void print_clebsch_gordan();
void init_rootpqarray();
void zero_uarraytot(const SNA_IVEC &);
void add_uarraytot(const SNA_DVEC &, const int, const SNA_IVEC &);
void compute_uarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &, const int,
const SNA_IVEC &);
double deltacg(int, int, int);
void compute_ncoeff();
void compute_duarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &, int,
const SNA_IVEC &);
inline double choose_beta(const int, const int, const int,
const int, const int, const int, int &);
// Sets the style for the switching function
// 0 = none
// 1 = cosine
int switch_flag;
// Sets the style for the inner switching function
// 0 = none
// 1 = cosine
int switch_inner_flag;
// Self-weight
double wself;
int bzero_flag; // 1 if bzero subtracted from barray
double *bzero; // array of B values for isolated atoms
int bnorm_flag; // 1 if barray divided by j+1
int chem_flag; // 1 for multi-element bispectrum components
int wselfall_flag; // 1 for adding wself to all element labelings
int nelements; // number of elements
int ndoubles; // number of multi-element pairs
int ntriples; // number of multi-element triplets
};
} // namespace LAMMPS_NS
#endif
#endif
#endif