Merge pull request #3921 from wmbrownIntel/snap-intel
Adding intel variant of snap pair style.
This commit is contained in:
@ -265,7 +265,7 @@ OPT.
|
||||
* :doc:`smd/tri_surface <pair_smd_triangulated_surface>`
|
||||
* :doc:`smd/ulsph <pair_smd_ulsph>`
|
||||
* :doc:`smtbq <pair_smtbq>`
|
||||
* :doc:`snap (k) <pair_snap>`
|
||||
* :doc:`snap (ik) <pair_snap>`
|
||||
* :doc:`soft (go) <pair_soft>`
|
||||
* :doc:`sph/heatconduction <pair_sph_heatconduction>`
|
||||
* :doc:`sph/idealgas <pair_sph_idealgas>`
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
.. index:: pair_style snap
|
||||
.. index:: pair_style snap/intel
|
||||
.. index:: pair_style snap/kk
|
||||
|
||||
pair_style snap command
|
||||
=======================
|
||||
|
||||
Accelerator Variants: *snap/kk*
|
||||
Accelerator Variants: *snap/intel*, *snap/kk*
|
||||
|
||||
Syntax
|
||||
""""""
|
||||
@ -260,6 +261,14 @@ This style is part of the ML-SNAP package. It is only enabled if LAMMPS
|
||||
was built with that package. See the :doc:`Build package
|
||||
<Build_package>` page for more info.
|
||||
|
||||
The *snap/intel* accelerator variant will *only* be available if LAMMPS
|
||||
is built with Intel *compilers* and for CPUs with AVX-512 support.
|
||||
While the INTEL package in general allows multiple floating point
|
||||
precision modes to be selected, *snap/intel* will currently always use
|
||||
full double precision regardless of the precision mode selected.
|
||||
Additionally, the *intel* variant of snap will **NOT** use multiple
|
||||
threads with OpenMP.
|
||||
|
||||
Related commands
|
||||
""""""""""""""""
|
||||
|
||||
|
||||
@ -185,6 +185,7 @@ fi
|
||||
if (test $1 = "ML-SNAP") then
|
||||
depend ML-IAP
|
||||
depend KOKKOS
|
||||
depend INTEL
|
||||
fi
|
||||
|
||||
if (test $1 = "CG-SPICA") then
|
||||
|
||||
70
src/INTEL/TEST/in.intel.snap
Normal file
70
src/INTEL/TEST/in.intel.snap
Normal file
@ -0,0 +1,70 @@
|
||||
# Toy demonstration of SNAP "scale" parameter, using fix/adapt and hybrid/overlay
|
||||
# Mixing linear and quadratic SNAP Ni potentials by Zuo et al. JCPA 2020
|
||||
|
||||
variable w index 10 # Warmup Timesteps
|
||||
variable t index 100 # Main Run Timesteps
|
||||
variable m index 1 # Main Run Timestep Multiplier
|
||||
variable n index 0 # Use NUMA Mapping for Multi-Node
|
||||
|
||||
variable x index 4
|
||||
variable y index 2
|
||||
variable z index 2
|
||||
|
||||
variable rr equal floor($t*$m)
|
||||
variable root getenv LMP_ROOT
|
||||
|
||||
if "$n > 0" then "processors * * * grid numa"
|
||||
|
||||
# mixing parameter
|
||||
|
||||
variable lambda equal 0.2
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable a equal 3.52
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a FCC lattice
|
||||
variable nx equal 20*$x
|
||||
variable ny equal 20*$y
|
||||
variable nz equal 20*$z
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice fcc $a
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
mass 1 34.
|
||||
|
||||
# choose bundled SNAP Ni potential from Zuo et al. JCPA 2020
|
||||
pair_style hybrid/overlay snap snap
|
||||
pair_coeff * * snap 1 &
|
||||
${root}/examples/snap/Ni_Zuo_JPCA2020.snapcoeff &
|
||||
${root}/examples/snap/Ni_Zuo_JPCA2020.snapparam Ni
|
||||
pair_coeff * * snap 2 &
|
||||
${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapcoeff &
|
||||
${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapparam Ni
|
||||
|
||||
# scale according to mixing parameter
|
||||
variable l1 equal ${lambda}
|
||||
variable l2 equal 1.0-${lambda}
|
||||
fix scale1 all adapt 1 pair snap:1 scale * * v_l1
|
||||
fix scale2 all adapt 1 pair snap:2 scale * * v_l2
|
||||
|
||||
# Setup output
|
||||
thermo 1
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
velocity all create 300.0 4928459 loop geom
|
||||
fix 1 all nve
|
||||
|
||||
if "$w > 0" then "run $w"
|
||||
run ${rr}
|
||||
@ -35,7 +35,7 @@ export I_MPI_PIN_DOMAIN=core
|
||||
# End settings for your system
|
||||
#########################################################################
|
||||
|
||||
export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff"
|
||||
export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff snap"
|
||||
export LMP_ARGS="-pk intel 0 -sf intel -screen none -v d 1"
|
||||
export RLMP_ARGS="-pk intel 0 lrt yes -sf intel -screen none -v d 1"
|
||||
|
||||
|
||||
@ -46,13 +46,38 @@ namespace ip_simd {
|
||||
|
||||
typedef __mmask16 SIMD_mask;
|
||||
|
||||
inline bool any(const SIMD_mask &m) { return m != 0; }
|
||||
|
||||
struct SIMD_int {
|
||||
__m512i v;
|
||||
SIMD_int() {}
|
||||
SIMD_int(const __m512i in) : v(in) {}
|
||||
inline int & operator[](const int i) { return ((int *)&(v))[i]; }
|
||||
inline const int & operator[](const int i) const
|
||||
{ return ((int *)&(v))[i]; }
|
||||
operator __m512i() const { return v;}
|
||||
};
|
||||
|
||||
struct SIMD256_int {
|
||||
__m256i v;
|
||||
SIMD256_int() {}
|
||||
SIMD256_int(const __m256i in) : v(in) {}
|
||||
SIMD256_int(const int in) : v(_mm256_set1_epi32(in)) {}
|
||||
inline int & operator[](const int i) { return ((int *)&(v))[i]; }
|
||||
inline const int & operator[](const int i) const
|
||||
{ return ((int *)&(v))[i]; }
|
||||
#ifdef __INTEL_LLVM_COMPILER
|
||||
inline SIMD256_int operator&=(const int i)
|
||||
{ v=_mm256_and_epi32(v, _mm256_set1_epi32(i)); return *this; };
|
||||
#else
|
||||
inline SIMD256_int operator&=(const int i)
|
||||
{ v=_mm256_and_si256(v, _mm256_set1_epi32(i)); return *this; };
|
||||
#endif
|
||||
inline SIMD256_int operator+=(const int i)
|
||||
{ v=_mm256_add_epi32(v, _mm256_set1_epi32(i)); return *this; };
|
||||
operator __m256i() const { return v;}
|
||||
};
|
||||
|
||||
struct SIMD_float {
|
||||
__m512 v;
|
||||
SIMD_float() {}
|
||||
@ -64,7 +89,24 @@ namespace ip_simd {
|
||||
__m512d v;
|
||||
SIMD_double() {}
|
||||
SIMD_double(const __m512d in) : v(in) {}
|
||||
SIMD_double(const double in) { v=_mm512_set1_pd(in); }
|
||||
inline double & operator[](const int i) { return ((double *)&(v))[i]; }
|
||||
inline const double & operator[](const int i) const
|
||||
{ return ((double *)&(v))[i]; }
|
||||
operator __m512d() const { return v;}
|
||||
|
||||
SIMD_double & operator=(const double i)
|
||||
{ _mm512_set1_pd(i); return *this; }
|
||||
SIMD_double &operator=(const SIMD_double &i)
|
||||
{ v = i.v; return *this; }
|
||||
|
||||
SIMD_double operator-() { return _mm512_xor_pd(v, _mm512_set1_pd(-0.0)); }
|
||||
SIMD_double & operator+=(const SIMD_double & two)
|
||||
{ v = _mm512_add_pd(v, two.v); return *this; }
|
||||
SIMD_double & operator-=(const SIMD_double & two)
|
||||
{ v = _mm512_sub_pd(v, two.v); return *this; }
|
||||
SIMD_double & operator*=(const SIMD_double & two)
|
||||
{ v = _mm512_mul_pd(v, two.v); return *this; }
|
||||
};
|
||||
|
||||
template<class flt_t>
|
||||
@ -99,6 +141,12 @@ namespace ip_simd {
|
||||
|
||||
// ------- Set Operations
|
||||
|
||||
inline SIMD256_int SIMD256_set(const int l0, const int l1, const int l2,
|
||||
const int l3, const int l4, const int l5,
|
||||
const int l6, const int l7) {
|
||||
return _mm256_setr_epi32(l0,l1,l2,l3,l4,l5,l6,l7);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_set(const int l0, const int l1, const int l2,
|
||||
const int l3, const int l4, const int l5,
|
||||
const int l6, const int l7, const int l8,
|
||||
@ -109,6 +157,10 @@ namespace ip_simd {
|
||||
l8,l9,l10,l11,l12,l13,l14,l15);
|
||||
}
|
||||
|
||||
inline SIMD256_int SIMD256_set(const int l) {
|
||||
return _mm256_set1_epi32(l);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_set(const int l) {
|
||||
return _mm512_set1_epi32(l);
|
||||
}
|
||||
@ -121,6 +173,10 @@ namespace ip_simd {
|
||||
return _mm512_set1_pd(l);
|
||||
}
|
||||
|
||||
inline SIMD256_int SIMD256_count() {
|
||||
return SIMD256_set(0,1,2,3,4,5,6,7);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_zero_masked(const SIMD_mask &m, const SIMD_int &one) {
|
||||
return _mm512_maskz_mov_epi32(m, one);
|
||||
}
|
||||
@ -147,6 +203,10 @@ namespace ip_simd {
|
||||
|
||||
// -------- Load Operations
|
||||
|
||||
inline SIMD256_int SIMD_load(const SIMD256_int *p) {
|
||||
return _mm256_load_epi32((int *)p);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_load(const int *p) {
|
||||
return _mm512_load_epi32(p);
|
||||
}
|
||||
@ -159,6 +219,10 @@ namespace ip_simd {
|
||||
return _mm512_load_pd(p);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_load(const SIMD_double *p) {
|
||||
return _mm512_load_pd((double *)p);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_loadz(const SIMD_mask &m, const int *p) {
|
||||
return _mm512_maskz_load_epi32(m, p);
|
||||
}
|
||||
@ -171,6 +235,10 @@ namespace ip_simd {
|
||||
return _mm512_maskz_load_pd(m, p);
|
||||
}
|
||||
|
||||
inline SIMD256_int SIMD_gather(const int *p, const SIMD256_int &i) {
|
||||
return _mm256_i32gather_epi32(p, i, _MM_SCALE_4);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_gather(const int *p, const SIMD_int &i) {
|
||||
return _mm512_i32gather_epi32(i, p, _MM_SCALE_4);
|
||||
}
|
||||
@ -179,6 +247,10 @@ namespace ip_simd {
|
||||
return _mm512_i32gather_ps(i, p, _MM_SCALE_4);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_gather(const double *p, const SIMD256_int &i) {
|
||||
return _mm512_i32gather_pd(i, p, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_gather(const double *p, const SIMD_int &i) {
|
||||
return _mm512_i32gather_pd(_mm512_castsi512_si256(i), p, _MM_SCALE_8);
|
||||
}
|
||||
@ -201,6 +273,12 @@ namespace ip_simd {
|
||||
_mm512_castsi512_si256(i), p, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_gather(const SIMD_mask &m, const double *p,
|
||||
const SIMD256_int &i) {
|
||||
return _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
i, p, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline SIMD_int SIMD_gatherz_offset(const SIMD_mask &m, const int *p,
|
||||
const SIMD_int &i) {
|
||||
@ -252,6 +330,15 @@ namespace ip_simd {
|
||||
return _mm512_store_pd(p,one);
|
||||
}
|
||||
|
||||
inline void SIMD_store(SIMD_double *p, const SIMD_double &one) {
|
||||
return _mm512_store_pd((double *)p,one);
|
||||
}
|
||||
|
||||
inline void SIMD_scatter(const SIMD_mask &m, int *p,
|
||||
const SIMD256_int &i, const SIMD256_int &vec) {
|
||||
_mm256_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
|
||||
}
|
||||
|
||||
inline void SIMD_scatter(const SIMD_mask &m, int *p,
|
||||
const SIMD_int &i, const SIMD_int &vec) {
|
||||
_mm512_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
|
||||
@ -268,8 +355,22 @@ namespace ip_simd {
|
||||
_MM_SCALE_8);
|
||||
}
|
||||
|
||||
inline void SIMD_scatter(const SIMD_mask &m, double *p,
|
||||
const SIMD256_int &i, const SIMD_double &vec) {
|
||||
_mm512_mask_i32scatter_pd(p, m, i, vec, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
inline void SIMD_scatter(double *p,
|
||||
const SIMD256_int &i, const SIMD_double &vec) {
|
||||
_mm512_i32scatter_pd(p, i, vec, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
// ------- Arithmetic Operations
|
||||
|
||||
inline SIMD256_int operator+(const SIMD256_int &one, const SIMD256_int &two) {
|
||||
return _mm256_add_epi32(one,two);
|
||||
}
|
||||
|
||||
inline SIMD_int operator+(const SIMD_int &one, const SIMD_int &two) {
|
||||
return _mm512_add_epi32(one,two);
|
||||
}
|
||||
@ -286,6 +387,10 @@ namespace ip_simd {
|
||||
return _mm512_add_epi32(one,SIMD_set(two));
|
||||
}
|
||||
|
||||
inline SIMD256_int operator+(const SIMD256_int &one, const int two) {
|
||||
return _mm256_add_epi32(one,SIMD256_set(two));
|
||||
}
|
||||
|
||||
inline SIMD_float operator+(const SIMD_float &one, const float two) {
|
||||
return _mm512_add_ps(one,SIMD_set(two));
|
||||
}
|
||||
@ -299,6 +404,11 @@ namespace ip_simd {
|
||||
return _mm512_mask_add_epi32(one,m,one,SIMD_set(two));
|
||||
}
|
||||
|
||||
inline SIMD256_int SIMD_add(const SIMD_mask &m,
|
||||
const SIMD256_int &one, const int two) {
|
||||
return _mm256_mask_add_epi32(one,m,one,SIMD256_set(two));
|
||||
}
|
||||
|
||||
inline SIMD_float SIMD_add(const SIMD_mask &m,
|
||||
const SIMD_float &one, const float two) {
|
||||
return _mm512_mask_add_ps(one,m,one,SIMD_set(two));
|
||||
@ -309,6 +419,11 @@ namespace ip_simd {
|
||||
return _mm512_mask_add_pd(one,m,one,SIMD_set(two));
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_add(const SIMD_mask &m,
|
||||
const SIMD_double &one, const SIMD_double &two) {
|
||||
return _mm512_mask_add_pd(one,m,one,two);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_add(const SIMD_int &s, const SIMD_mask &m,
|
||||
const SIMD_int &one, const SIMD_int &two) {
|
||||
return _mm512_mask_add_epi32(s,m,one,two);
|
||||
@ -387,6 +502,10 @@ namespace ip_simd {
|
||||
return _mm512_mul_pd(one,two);
|
||||
}
|
||||
|
||||
inline SIMD256_int operator*(const SIMD256_int &one, const int two) {
|
||||
return _mm256_mullo_epi32(one,SIMD256_set(two));
|
||||
}
|
||||
|
||||
inline SIMD_int operator*(const SIMD_int &one, const int two) {
|
||||
return _mm512_mullo_epi32(one,SIMD_set(two));
|
||||
}
|
||||
@ -417,6 +536,12 @@ namespace ip_simd {
|
||||
return _mm512_fmadd_pd(one,two,three);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_fma(const SIMD_mask m, const SIMD_double &one,
|
||||
const SIMD_double &two,
|
||||
const SIMD_double &three) {
|
||||
return _mm512_mask3_fmadd_pd(one,two,three,m);
|
||||
}
|
||||
|
||||
inline SIMD_float SIMD_fms(const SIMD_float &one, const SIMD_float &two,
|
||||
const SIMD_float &three) {
|
||||
return _mm512_fmsub_ps(one,two,three);
|
||||
@ -493,6 +618,10 @@ namespace ip_simd {
|
||||
return _mm512_pow_pd(one, two);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_pow(const SIMD_double &one, const double two) {
|
||||
return _mm512_pow_pd(one, SIMD_set(two));
|
||||
}
|
||||
|
||||
inline SIMD_float SIMD_exp(const SIMD_float &one) {
|
||||
return _mm512_exp_ps(one);
|
||||
}
|
||||
@ -501,6 +630,18 @@ namespace ip_simd {
|
||||
return _mm512_exp_pd(one);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_cos(const SIMD_double &one) {
|
||||
return _mm512_cos_pd(one);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_sin(const SIMD_double &one) {
|
||||
return _mm512_sin_pd(one);
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_tan(const SIMD_double &one) {
|
||||
return _mm512_tan_pd(one);
|
||||
}
|
||||
|
||||
// ------- Comparison operations
|
||||
|
||||
inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_int &one,
|
||||
@ -533,6 +674,14 @@ namespace ip_simd {
|
||||
return _mm512_mask_cmplt_pd_mask(m, SIMD_set(one), two);
|
||||
}
|
||||
|
||||
inline SIMD_mask operator<(const SIMD256_int &one, const SIMD256_int &two) {
|
||||
return _mm256_cmplt_epi32_mask(one,two);
|
||||
}
|
||||
|
||||
inline SIMD_mask operator<(const int one, const SIMD256_int &two) {
|
||||
return _mm256_cmplt_epi32_mask(SIMD256_set(one),two);
|
||||
}
|
||||
|
||||
inline SIMD_mask operator<(const SIMD_int &one, const SIMD_int &two) {
|
||||
return _mm512_cmplt_epi32_mask(one,two);
|
||||
}
|
||||
@ -577,6 +726,10 @@ namespace ip_simd {
|
||||
return _mm512_cmple_ps_mask(SIMD_set(one), two);
|
||||
}
|
||||
|
||||
inline SIMD_mask operator<=(const SIMD_double &one, const SIMD_double &two) {
|
||||
return _mm512_cmple_pd_mask(one, two);
|
||||
}
|
||||
|
||||
inline SIMD_mask operator<=(const double one, const SIMD_double &two) {
|
||||
return _mm512_cmple_pd_mask(SIMD_set(one), two);
|
||||
}
|
||||
@ -593,6 +746,14 @@ namespace ip_simd {
|
||||
return _mm512_cmplt_pd_mask(two,one);
|
||||
}
|
||||
|
||||
inline SIMD_mask operator>(const SIMD_double &one, const double two) {
|
||||
return _mm512_cmplt_pd_mask(SIMD_set(two),one);
|
||||
}
|
||||
|
||||
inline SIMD_mask operator==(const SIMD256_int &one, const int two) {
|
||||
return _mm256_cmpeq_epi32_mask(one,_mm256_set1_epi32(two));
|
||||
}
|
||||
|
||||
inline SIMD_mask operator==(const SIMD_int &one, const SIMD_int &two) {
|
||||
return _mm512_cmpeq_epi32_mask(one,two);
|
||||
}
|
||||
|
||||
779
src/INTEL/pair_snap_intel.cpp
Normal file
779
src/INTEL/pair_snap_intel.cpp
Normal file
@ -0,0 +1,779 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
|
||||
|
||||
#include "pair_snap_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "memory.h"
|
||||
#include "modify.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neighbor.h"
|
||||
#include "sna_intel.h"
|
||||
#include "tokenizer.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace ip_simd;
|
||||
|
||||
#define MAXLINE 1024
|
||||
#define MAXWORD 3
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairSNAPIntel::PairSNAPIntel(LAMMPS *lmp) : Pair(lmp)
|
||||
{
|
||||
single_enable = 0;
|
||||
restartinfo = 0;
|
||||
one_coeff = 1;
|
||||
manybody_flag = 1;
|
||||
centroidstressflag = CENTROID_NOTAVAIL;
|
||||
|
||||
radelem = nullptr;
|
||||
wjelem = nullptr;
|
||||
coeffelem = nullptr;
|
||||
sinnerelem = nullptr;
|
||||
dinnerelem = nullptr;
|
||||
|
||||
beta = nullptr;
|
||||
bispectrum = nullptr;
|
||||
snaptr = nullptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairSNAPIntel::~PairSNAPIntel()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memory->destroy(radelem);
|
||||
memory->destroy(wjelem);
|
||||
memory->destroy(coeffelem);
|
||||
memory->destroy(sinnerelem);
|
||||
memory->destroy(dinnerelem);
|
||||
|
||||
memory->destroy(beta);
|
||||
memory->destroy(bispectrum);
|
||||
|
||||
delete snaptr;
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(cutsq);
|
||||
memory->destroy(scale);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
This version is a straightforward implementation
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
void PairSNAPIntel::compute(int eflag, int vflag)
|
||||
{
|
||||
SNA_DVEC fij[3];
|
||||
int *jlist,*numneigh,**firstneigh;
|
||||
|
||||
ev_init(eflag,vflag);
|
||||
int tally_xyz = 0;
|
||||
if (vflag_atom || (vflag && !vflag_fdotr)) tally_xyz = 1;
|
||||
|
||||
double **x = atom->x;
|
||||
double *_x = atom->x[0];
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int newton_pair = force->newton_pair;
|
||||
|
||||
// compute dE_i/dB_i = beta_i for all i in list
|
||||
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
SNA_DVEC sevdwl(0);
|
||||
|
||||
const int vw = snaptr->vector_width();
|
||||
for (int ii = 0; ii < list->inum; ii+=vw) {
|
||||
SNA_IVEC i, jnum;
|
||||
int max_jnum = 0;
|
||||
for (int l = 0; l < vw; l++) {
|
||||
if (ii + l < list->inum) {
|
||||
i[l] = list->ilist[ii + l];
|
||||
jnum[l] = numneigh[i[l]];
|
||||
} else {
|
||||
i[l] = list->ilist[0];
|
||||
jnum[l] = 0;
|
||||
}
|
||||
if (jnum[l] > max_jnum) max_jnum = jnum[l];
|
||||
}
|
||||
|
||||
// ensure rij, inside, wj, and rcutij are of size jnum
|
||||
|
||||
snaptr->grow_rij(max_jnum);
|
||||
|
||||
SNA_IVEC zero_vec(0);
|
||||
|
||||
const SNA_DVEC xtmp = SIMD_gather(_x, i * 3);
|
||||
const SNA_DVEC ytmp = SIMD_gather(_x, i * 3 + 1);
|
||||
const SNA_DVEC ztmp = SIMD_gather(_x, i * 3 + 2);
|
||||
const SNA_IVEC itype = SIMD_gather(type, i);
|
||||
const SNA_IVEC ielem = SIMD_gather(map, itype);
|
||||
const SNA_DVEC radi = SIMD_gather(radelem, ielem);
|
||||
|
||||
// rij[][3] = displacements between atom I and those neighbors
|
||||
// inside = indices of neighbors of I within cutoff
|
||||
// wj = weights for neighbors of I within cutoff
|
||||
// rcutij = cutoffs for neighbors of I within cutoff
|
||||
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
|
||||
|
||||
SNA_IVEC ninside(0);
|
||||
for (int jj = 0; jj < max_jnum; jj++) {
|
||||
SIMD_mask m(SIMD256_set(jj) < jnum);
|
||||
|
||||
SNA_IVEC j;
|
||||
SV_for (int l = 0; l < vw; l++) {
|
||||
jlist = firstneigh[i[l]];
|
||||
if (jj < jnum[l]) j[l] = jlist[jj];
|
||||
else j[l] = 0;
|
||||
}
|
||||
j &= NEIGHMASK;
|
||||
|
||||
const SNA_DVEC delx = SIMD_gather(m, _x, j * 3) - xtmp;
|
||||
const SNA_DVEC dely = SIMD_gather(m, _x, j * 3 + 1) - ytmp;
|
||||
const SNA_DVEC delz = SIMD_gather(m, _x, j * 3 + 2) - ztmp;
|
||||
const SNA_IVEC jtype = SIMD_gather(type, j);
|
||||
const SNA_DVEC rsq = delx*delx + dely*dely + delz*delz;
|
||||
const SNA_DVEC vcut = SIMD_gather(m, cutsq[0],
|
||||
itype * (atom->ntypes + 1) + jtype);
|
||||
|
||||
m &= rsq < vcut;
|
||||
m &= rsq > SIMD_set(1e-20);
|
||||
const SNA_IVEC jelem = SIMD_gather(map, jtype);
|
||||
const SNA_IVEC ni3 = ninside * vw * 3 + SIMD256_count();
|
||||
SIMD_scatter(m, (double *)(snaptr->rij[0]), ni3, delx);
|
||||
SIMD_scatter(m, (double *)(snaptr->rij[0] + 1), ni3, dely);
|
||||
SIMD_scatter(m, (double *)(snaptr->rij[0] + 2), ni3, delz);
|
||||
const SNA_IVEC ni = ninside * vw + SIMD256_count();
|
||||
SIMD_scatter(m, (int *)(snaptr->inside), ni, j);
|
||||
SIMD_scatter(m, (double *)(snaptr->wj), ni,
|
||||
SIMD_gather(m, wjelem, jelem));
|
||||
SIMD_scatter(m, (double *)(snaptr->rcutij), ni,
|
||||
(radi + SIMD_gather(m, radelem, jelem)) * rcutfac);
|
||||
if (switchinnerflag) {
|
||||
SIMD_scatter(m, (double *)(snaptr->sinnerij), ni,
|
||||
(SIMD_gather(m, sinnerelem, ielem) +
|
||||
SIMD_gather(m, sinnerelem, jelem)) * 0.5);
|
||||
SIMD_scatter(m, (double *)(snaptr->dinnerij), ni,
|
||||
(SIMD_gather(m, dinnerelem, ielem) +
|
||||
SIMD_gather(m, dinnerelem, jelem)) * 0.5);
|
||||
}
|
||||
if (chemflag)
|
||||
SIMD_scatter(m, (int *)(snaptr->element), ni, jelem);
|
||||
ninside = SIMD_add(m, ninside, 1);
|
||||
} // for jj
|
||||
|
||||
// compute Ui, Yi for atom I
|
||||
|
||||
if (chemflag)
|
||||
snaptr->compute_ui(ninside, ielem, max_jnum);
|
||||
else
|
||||
snaptr->compute_ui(ninside, zero_vec, max_jnum);
|
||||
|
||||
// Compute bispectrum
|
||||
if (quadraticflag || eflag) {
|
||||
snaptr->compute_zi_or_yi<0>(beta);
|
||||
if (chemflag)
|
||||
snaptr->compute_bi(ielem);
|
||||
else
|
||||
snaptr->compute_bi(zero_vec);
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
SIMD_store(bispectrum + icoeff, SIMD_load(snaptr->blist + icoeff));
|
||||
}
|
||||
|
||||
// Compute beta
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
SIMD_store(beta + icoeff, SIMD_gather(coeffelem[0],
|
||||
ielem * ncoeffall + icoeff + 1));
|
||||
|
||||
if (quadraticflag) {
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
|
||||
SNA_DVEC beta_i = SIMD_load(beta + icoeff) +
|
||||
SIMD_gather(coeffelem[0], ielem * ncoeffall + k) * bveci;
|
||||
k++;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
const SNA_DVEC ci = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
|
||||
beta_i = beta_i + ci * SIMD_load(bispectrum + jcoeff);
|
||||
SIMD_store(beta + jcoeff, ci * bveci + SIMD_load(beta + jcoeff));
|
||||
k++;
|
||||
}
|
||||
SIMD_store(beta + icoeff, beta_i);
|
||||
}
|
||||
}
|
||||
|
||||
// for neighbors of I within cutoff:
|
||||
// compute Fij = dEi/dRj = -dEi/dRi
|
||||
// add to Fi, subtract from Fj
|
||||
// scaling is that for type I
|
||||
|
||||
if (quadraticflag || eflag)
|
||||
snaptr->compute_yi_from_zi(beta);
|
||||
else
|
||||
snaptr->compute_zi_or_yi<1>(beta);
|
||||
|
||||
SNA_DVEC fi_x(0.0), fi_y(0.0), fi_z(0.0);
|
||||
SNA_DVEC scalev = SIMD_gather(scale[0], itype * (atom->ntypes+1) + itype);
|
||||
for (int jj = 0; jj < max_jnum; jj++) {
|
||||
snaptr->compute_duidrj(jj, ninside);
|
||||
if (chemflag && nelements > 1)
|
||||
snaptr->compute_deidrj_e(jj, ninside, fij);
|
||||
else
|
||||
snaptr->compute_deidrj(jj, ninside, fij);
|
||||
|
||||
SNA_DVEC fijs_x = fij[0] * scalev;
|
||||
SNA_DVEC fijs_y = fij[1] * scalev;
|
||||
SNA_DVEC fijs_z = fij[2] * scalev;
|
||||
|
||||
fi_x += fijs_x;
|
||||
fi_y += fijs_y;
|
||||
fi_z += fijs_z;
|
||||
|
||||
for (int l = 0; l < vw; l++) {
|
||||
if (jj < ninside[l]) {
|
||||
int j = snaptr->inside[jj][l];
|
||||
f[j][0] -= fijs_x[l];
|
||||
f[j][1] -= fijs_y[l];
|
||||
f[j][2] -= fijs_z[l];
|
||||
|
||||
if (tally_xyz)
|
||||
ev_tally_xyz(i[l],j,nlocal,newton_pair,0.0,0.0,
|
||||
fij[0][l],fij[1][l],fij[2][l],
|
||||
-snaptr->rij[jj][0][l],-snaptr->rij[jj][1][l],
|
||||
-snaptr->rij[jj][2][l]);
|
||||
}
|
||||
} // for l
|
||||
} // for jj
|
||||
SIMD_mask m((SIMD256_count() + ii) < list->inum);
|
||||
SNA_DVEC fix = SIMD_gather(m, f[0], i * 3) + fi_x;
|
||||
SIMD_scatter(m, f[0], i * 3, fix);
|
||||
SNA_DVEC fiy = SIMD_gather(m, f[0], i * 3 + 1) + fi_y;
|
||||
SIMD_scatter(m, f[0], i * 3 + 1, fiy);
|
||||
SNA_DVEC fiz = SIMD_gather(m, f[0], i * 3 + 2) + fi_z;
|
||||
SIMD_scatter(m, f[0], i * 3 + 2, fiz);
|
||||
|
||||
// tally energy contribution
|
||||
|
||||
if (eflag) {
|
||||
SNA_DVEC evdwl = SIMD_gather(coeffelem[0], ielem * ncoeffall);
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
evdwl += SIMD_gather(coeffelem[0], ielem * ncoeffall + icoeff +1) *
|
||||
bispectrum[icoeff];
|
||||
|
||||
if (quadraticflag) {
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
|
||||
SNA_DVEC c = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
|
||||
k++;
|
||||
evdwl += c * 0.5 * bveci * bveci;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
SNA_DVEC bvecj = SIMD_load(bispectrum + jcoeff);
|
||||
SNA_DVEC cj = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
|
||||
k++;
|
||||
evdwl += cj * bveci * bvecj;
|
||||
}
|
||||
}
|
||||
}
|
||||
sevdwl += scalev * evdwl;
|
||||
if (eatom) {
|
||||
SNA_DVEC ea = SIMD_gather(m, eatom, i) + scalev * evdwl;
|
||||
SIMD_scatter(m, eatom, i, ea);
|
||||
}
|
||||
} // if (eflag)
|
||||
} // for ii
|
||||
if (eflag) eng_vdwl += SIMD_sum(sevdwl);
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairSNAPIntel::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
int n = atom->ntypes;
|
||||
memory->create(setflag,n+1,n+1,"pair:setflag");
|
||||
memory->create(cutsq,n+1,n+1,"pair:cutsq");
|
||||
memory->create(scale,n+1,n+1,"pair:scale");
|
||||
map = new int[n+1];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairSNAPIntel::settings(int narg, char ** /* arg */)
|
||||
{
|
||||
if (narg > 0)
|
||||
error->all(FLERR,"Illegal pair_style command");
|
||||
if ((comm->me == 0) && (comm->nthreads > 1))
|
||||
error->warning(FLERR, "Pair style snap/intel does not use OpenMP threads");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairSNAPIntel::coeff(int narg, char **arg)
|
||||
{
|
||||
if (!allocated) allocate();
|
||||
if (narg != 4 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
|
||||
map_element2type(narg-4,arg+4);
|
||||
|
||||
// read snapcoeff and snapparam files
|
||||
|
||||
read_files(arg[2],arg[3]);
|
||||
|
||||
if (!quadraticflag)
|
||||
ncoeff = ncoeffall - 1;
|
||||
else {
|
||||
|
||||
// ncoeffall should be (ncoeff+2)*(ncoeff+1)/2
|
||||
// so, ncoeff = floor(sqrt(2*ncoeffall))-1
|
||||
|
||||
ncoeff = sqrt(2.0*ncoeffall)-1;
|
||||
ncoeffq = (ncoeff*(ncoeff+1))/2;
|
||||
int ntmp = 1+ncoeff+ncoeffq;
|
||||
if (ntmp != ncoeffall) {
|
||||
error->all(FLERR,"Incorrect SNAP coeff file");
|
||||
}
|
||||
}
|
||||
|
||||
snaptr = new SNAIntel(lmp, rfac0, twojmax,
|
||||
rmin0, switchflag, bzeroflag,
|
||||
chemflag, bnormflag, wselfallflag,
|
||||
nelements, switchinnerflag);
|
||||
|
||||
if (ncoeff != snaptr->ncoeff) {
|
||||
if (comm->me == 0)
|
||||
printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff);
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
}
|
||||
|
||||
// Calculate maximum cutoff for all elements
|
||||
rcutmax = 0.0;
|
||||
for (int ielem = 0; ielem < nelements; ielem++)
|
||||
rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax);
|
||||
|
||||
// set default scaling
|
||||
int n = atom->ntypes;
|
||||
for (int ii = 0; ii < n+1; ii++)
|
||||
for (int jj = 0; jj < n+1; jj++)
|
||||
scale[ii][jj] = 1.0;
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairSNAPIntel::init_style()
|
||||
{
|
||||
if (force->newton_pair == 0)
|
||||
error->all(FLERR,"Pair style SNAP requires newton pair on");
|
||||
|
||||
// need a full neighbor list
|
||||
|
||||
neighbor->add_request(this, NeighConst::REQ_FULL);
|
||||
|
||||
snaptr->init();
|
||||
|
||||
fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
|
||||
if (!fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
|
||||
|
||||
fix->pair_init_check();
|
||||
|
||||
memory->create(bispectrum,ncoeff,"PairSNAP:bispectrum");
|
||||
memory->create(beta,ncoeff,"PairSNAP:beta");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PairSNAPIntel::init_one(int i, int j)
|
||||
{
|
||||
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
|
||||
scale[j][i] = scale[i][j];
|
||||
return (radelem[map[i]] +
|
||||
radelem[map[j]])*rcutfac;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairSNAPIntel::read_files(char *coefffilename, char *paramfilename)
|
||||
{
|
||||
|
||||
// open SNAP coefficient file on proc 0
|
||||
|
||||
FILE *fpcoeff;
|
||||
if (comm->me == 0) {
|
||||
fpcoeff = utils::open_potential(coefffilename,lmp,nullptr);
|
||||
if (fpcoeff == nullptr)
|
||||
error->one(FLERR,"Cannot open SNAP coefficient file {}: ",
|
||||
coefffilename, utils::getsyserror());
|
||||
}
|
||||
|
||||
char line[MAXLINE],*ptr;
|
||||
int eof = 0;
|
||||
int nwords = 0;
|
||||
while (nwords == 0) {
|
||||
if (comm->me == 0) {
|
||||
ptr = fgets(line,MAXLINE,fpcoeff);
|
||||
if (ptr == nullptr) {
|
||||
eof = 1;
|
||||
fclose(fpcoeff);
|
||||
}
|
||||
}
|
||||
MPI_Bcast(&eof,1,MPI_INT,0,world);
|
||||
if (eof) break;
|
||||
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
|
||||
|
||||
// strip comment, skip line if blank
|
||||
|
||||
nwords = utils::count_words(utils::trim_comment(line));
|
||||
}
|
||||
if (nwords != 2)
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file");
|
||||
|
||||
// strip single and double quotes from words
|
||||
|
||||
int nelemtmp = 0;
|
||||
try {
|
||||
ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
|
||||
nelemtmp = words.next_int();
|
||||
ncoeffall = words.next_int();
|
||||
} catch (TokenizerException &e) {
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
|
||||
}
|
||||
|
||||
// clean out old arrays and set up element lists
|
||||
|
||||
memory->destroy(radelem);
|
||||
memory->destroy(wjelem);
|
||||
memory->destroy(coeffelem);
|
||||
memory->destroy(sinnerelem);
|
||||
memory->destroy(dinnerelem);
|
||||
memory->create(radelem,nelements,"pair:radelem");
|
||||
memory->create(wjelem,nelements,"pair:wjelem");
|
||||
memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem");
|
||||
memory->create(sinnerelem,nelements,"pair:sinnerelem");
|
||||
memory->create(dinnerelem,nelements,"pair:dinnerelem");
|
||||
|
||||
// initialize checklist for all required nelements
|
||||
|
||||
int *elementflags = new int[nelements];
|
||||
for (int jelem = 0; jelem < nelements; jelem++)
|
||||
elementflags[jelem] = 0;
|
||||
|
||||
// loop over nelemtmp blocks in the SNAP coefficient file
|
||||
|
||||
for (int ielem = 0; ielem < nelemtmp; ielem++) {
|
||||
|
||||
if (comm->me == 0) {
|
||||
ptr = fgets(line,MAXLINE,fpcoeff);
|
||||
if (ptr == nullptr) {
|
||||
eof = 1;
|
||||
fclose(fpcoeff);
|
||||
}
|
||||
}
|
||||
MPI_Bcast(&eof,1,MPI_INT,0,world);
|
||||
if (eof)
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file");
|
||||
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
|
||||
|
||||
std::vector<std::string> words;
|
||||
try {
|
||||
words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
|
||||
} catch (TokenizerException &) {
|
||||
// ignore
|
||||
}
|
||||
if (words.size() != 3)
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file");
|
||||
|
||||
int jelem;
|
||||
for (jelem = 0; jelem < nelements; jelem++)
|
||||
if (words[0] == elements[jelem]) break;
|
||||
|
||||
// if this element not needed, skip this block
|
||||
|
||||
if (jelem == nelements) {
|
||||
if (comm->me == 0) {
|
||||
for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
|
||||
ptr = fgets(line,MAXLINE,fpcoeff);
|
||||
if (ptr == nullptr) {
|
||||
eof = 1;
|
||||
fclose(fpcoeff);
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Bcast(&eof,1,MPI_INT,0,world);
|
||||
if (eof)
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (elementflags[jelem] == 1)
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file");
|
||||
else
|
||||
elementflags[jelem] = 1;
|
||||
|
||||
radelem[jelem] = utils::numeric(FLERR,words[1],false,lmp);
|
||||
wjelem[jelem] = utils::numeric(FLERR,words[2],false,lmp);
|
||||
|
||||
if (comm->me == 0)
|
||||
utils::logmesg(lmp,"SNAP Element = {}, Radius {}, Weight {}\n",
|
||||
elements[jelem], radelem[jelem], wjelem[jelem]);
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
|
||||
if (comm->me == 0) {
|
||||
ptr = fgets(line,MAXLINE,fpcoeff);
|
||||
if (ptr == nullptr) {
|
||||
eof = 1;
|
||||
fclose(fpcoeff);
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Bcast(&eof,1,MPI_INT,0,world);
|
||||
if (eof)
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file");
|
||||
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
|
||||
|
||||
try {
|
||||
ValueTokenizer coeff(utils::trim_comment(line));
|
||||
if (coeff.count() != 1)
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file");
|
||||
|
||||
coeffelem[jelem][icoeff] = coeff.next_double();
|
||||
} catch (TokenizerException &e) {
|
||||
error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (comm->me == 0) fclose(fpcoeff);
|
||||
|
||||
for (int jelem = 0; jelem < nelements; jelem++) {
|
||||
if (elementflags[jelem] == 0)
|
||||
error->all(FLERR,"Element {} not found in SNAP coefficient file", elements[jelem]);
|
||||
}
|
||||
delete[] elementflags;
|
||||
|
||||
// set flags for required keywords
|
||||
|
||||
rcutfacflag = 0;
|
||||
twojmaxflag = 0;
|
||||
|
||||
// Set defaults for optional keywords
|
||||
|
||||
rfac0 = 0.99363;
|
||||
rmin0 = 0.0;
|
||||
switchflag = 1;
|
||||
bzeroflag = 1;
|
||||
quadraticflag = 0;
|
||||
chemflag = 0;
|
||||
bnormflag = 0;
|
||||
wselfallflag = 0;
|
||||
switchinnerflag = 0;
|
||||
chunksize = 32768;
|
||||
parallel_thresh = 8192;
|
||||
|
||||
// set local input checks
|
||||
|
||||
int sinnerflag = 0;
|
||||
int dinnerflag = 0;
|
||||
|
||||
// open SNAP parameter file on proc 0
|
||||
|
||||
FILE *fpparam;
|
||||
if (comm->me == 0) {
|
||||
fpparam = utils::open_potential(paramfilename,lmp,nullptr);
|
||||
if (fpparam == nullptr)
|
||||
error->one(FLERR,"Cannot open SNAP parameter file {}: {}",
|
||||
paramfilename, utils::getsyserror());
|
||||
}
|
||||
|
||||
eof = 0;
|
||||
while (true) {
|
||||
if (comm->me == 0) {
|
||||
ptr = fgets(line,MAXLINE,fpparam);
|
||||
if (ptr == nullptr) {
|
||||
eof = 1;
|
||||
fclose(fpparam);
|
||||
}
|
||||
}
|
||||
MPI_Bcast(&eof,1,MPI_INT,0,world);
|
||||
if (eof) break;
|
||||
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
|
||||
|
||||
// words = ptrs to all words in line
|
||||
// strip single and double quotes from words
|
||||
|
||||
std::vector<std::string> words;
|
||||
try {
|
||||
words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
|
||||
} catch (TokenizerException &) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
if (words.size() == 0) continue;
|
||||
|
||||
if (words.size() < 2)
|
||||
error->all(FLERR,"Incorrect format in SNAP parameter file");
|
||||
|
||||
auto keywd = words[0];
|
||||
auto keyval = words[1];
|
||||
|
||||
// check for keywords with more than one value per element
|
||||
|
||||
if (keywd == "sinner" || keywd == "dinner") {
|
||||
|
||||
if ((int)words.size() != nelements+1)
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
|
||||
// innerlogstr collects all values of sinner or dinner for log output below
|
||||
|
||||
std::string innerlogstr;
|
||||
|
||||
int iword = 1;
|
||||
|
||||
if (keywd == "sinner") {
|
||||
for (int ielem = 0; ielem < nelements; ielem++) {
|
||||
keyval = words[iword];
|
||||
sinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
|
||||
iword++;
|
||||
innerlogstr += keyval + " ";
|
||||
}
|
||||
sinnerflag = 1;
|
||||
} else if (keywd == "dinner") {
|
||||
for (int ielem = 0; ielem < nelements; ielem++) {
|
||||
keyval = words[iword];
|
||||
dinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
|
||||
iword++;
|
||||
innerlogstr += keyval + " ";
|
||||
}
|
||||
dinnerflag = 1;
|
||||
}
|
||||
|
||||
if (comm->me == 0)
|
||||
utils::logmesg(lmp,"SNAP keyword {} {} ... \n", keywd, innerlogstr);
|
||||
|
||||
} else {
|
||||
|
||||
// all other keywords take one value
|
||||
|
||||
if (nwords != 2)
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
|
||||
if (comm->me == 0)
|
||||
utils::logmesg(lmp,"SNAP keyword {} {}\n",keywd,keyval);
|
||||
|
||||
if (keywd == "rcutfac") {
|
||||
rcutfac = utils::numeric(FLERR,keyval,false,lmp);
|
||||
rcutfacflag = 1;
|
||||
} else if (keywd == "twojmax") {
|
||||
twojmax = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
twojmaxflag = 1;
|
||||
} else if (keywd == "rfac0")
|
||||
rfac0 = utils::numeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "rmin0")
|
||||
rmin0 = utils::numeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "switchflag")
|
||||
switchflag = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "bzeroflag")
|
||||
bzeroflag = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "quadraticflag")
|
||||
quadraticflag = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "chemflag")
|
||||
chemflag = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "bnormflag")
|
||||
bnormflag = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "wselfallflag")
|
||||
wselfallflag = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "switchinnerflag")
|
||||
switchinnerflag = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "chunksize")
|
||||
chunksize = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else if (keywd == "parallelthresh")
|
||||
parallel_thresh = utils::inumeric(FLERR,keyval,false,lmp);
|
||||
else
|
||||
error->all(FLERR,"Unknown parameter '{}' in SNAP parameter file", keywd);
|
||||
}
|
||||
}
|
||||
|
||||
if (rcutfacflag == 0 || twojmaxflag == 0)
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
|
||||
if (chemflag && nelemtmp != nelements)
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
|
||||
if (switchinnerflag && !(sinnerflag && dinnerflag))
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
|
||||
if (!switchinnerflag && (sinnerflag || dinnerflag))
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PairSNAPIntel::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
|
||||
int n = atom->ntypes+1;
|
||||
bytes += (double)n*n*sizeof(int); // setflag
|
||||
bytes += (double)n*n*sizeof(double); // cutsq
|
||||
bytes += (double)n*n*sizeof(double); // scale
|
||||
bytes += (double)n*sizeof(int); // map
|
||||
bytes += (double)ncoeff*sizeof(SNA_DVEC); // bispectrum
|
||||
bytes += (double)ncoeff*sizeof(SNA_DVEC); // beta
|
||||
|
||||
bytes += snaptr->memory_usage(); // SNA object
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void *PairSNAPIntel::extract(const char *str, int &dim)
|
||||
{
|
||||
dim = 2;
|
||||
if (strcmp(str,"scale") == 0) return (void *) scale;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
83
src/INTEL/pair_snap_intel.h
Normal file
83
src/INTEL/pair_snap_intel.h
Normal file
@ -0,0 +1,83 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
// clang-format off
|
||||
PairStyle(snap/intel,PairSNAPIntel);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_SNAP_INTEL_H
|
||||
#define LMP_PAIR_SNAP_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "pair.h"
|
||||
|
||||
namespace ip_simd { class SIMD_double; class SIMD_int; };
|
||||
#define SNA_DVEC ip_simd::SIMD_double
|
||||
#define SNA_IVEC ip_simd::SIMD256_int
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairSNAPIntel : public Pair {
|
||||
public:
|
||||
PairSNAPIntel(class LAMMPS *);
|
||||
~PairSNAPIntel() override;
|
||||
void compute(int, int) override;
|
||||
void settings(int, char **) override;
|
||||
void coeff(int, char **) override;
|
||||
void init_style() override;
|
||||
double init_one(int, int) override;
|
||||
double memory_usage() override;
|
||||
void *extract(const char *, int &) override;
|
||||
|
||||
double rcutfac, quadraticflag; // declared public to workaround gcc 4.9
|
||||
int ncoeff; // compiler bug, manifest in KOKKOS package
|
||||
|
||||
protected:
|
||||
FixIntel *fix;
|
||||
|
||||
int ncoeffq, ncoeffall;
|
||||
class SNAIntel *snaptr;
|
||||
virtual void allocate();
|
||||
void read_files(char *, char *);
|
||||
inline int equal(double *x, double *y);
|
||||
inline double dist2(double *x, double *y);
|
||||
|
||||
double rcutmax; // max cutoff for all elements
|
||||
double *radelem; // element radii
|
||||
double *wjelem; // elements weights
|
||||
double **coeffelem; // element bispectrum coefficients
|
||||
SNA_DVEC *beta; // betas for all atoms in list
|
||||
SNA_DVEC *bispectrum; // bispectrum components for all atoms in list
|
||||
double **scale; // for thermodynamic integration
|
||||
int twojmax, switchflag, bzeroflag, bnormflag;
|
||||
int chemflag, wselfallflag;
|
||||
int switchinnerflag; // inner cutoff switch
|
||||
double *sinnerelem; // element inner cutoff midpoint
|
||||
double *dinnerelem; // element inner cutoff half-width
|
||||
int chunksize, parallel_thresh;
|
||||
double rfac0, rmin0, wj1, wj2;
|
||||
int rcutfacflag, twojmaxflag; // flags for required parameters
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
1505
src/INTEL/sna_intel.cpp
Normal file
1505
src/INTEL/sna_intel.cpp
Normal file
File diff suppressed because it is too large
Load Diff
187
src/INTEL/sna_intel.h
Normal file
187
src/INTEL/sna_intel.h
Normal file
@ -0,0 +1,187 @@
|
||||
/* -*- c++ -*- -------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: W. Michael Brown, Intel
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_SNA_INTEL_H
|
||||
#define LMP_SNA_INTEL_H
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
|
||||
|
||||
#include "pointers.h"
|
||||
#include "intel_buffers.h"
|
||||
#include "intel_simd.h"
|
||||
|
||||
#define SVW 8
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#define SV_for _Pragma("omp simd") _Pragma("vector aligned") for
|
||||
#else
|
||||
#define SV_for _Pragma("simd assert") _Pragma("vector aligned") for
|
||||
#endif
|
||||
#else
|
||||
#define SV_for for
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct SNA_ZINDICES {
|
||||
int j1, j2, j, ma1min, ma2max, mb1min;
|
||||
int mb2max, na, nb, jju;
|
||||
};
|
||||
|
||||
struct SNA_BINDICES {
|
||||
int j1, j2, j;
|
||||
};
|
||||
|
||||
#define SNA_DVEC ip_simd::SIMD_double
|
||||
#define SNA_IVEC ip_simd::SIMD256_int
|
||||
|
||||
class SNAIntel : protected Pointers {
|
||||
|
||||
public:
|
||||
SNAIntel(LAMMPS *, double, int, double, int, int, int, int, int, int, int);
|
||||
|
||||
SNAIntel(LAMMPS *lmp) : Pointers(lmp){};
|
||||
~SNAIntel() override;
|
||||
void build_indexlist();
|
||||
void init();
|
||||
double memory_usage();
|
||||
|
||||
int ncoeff;
|
||||
|
||||
inline int vector_width() const { return SVW; }
|
||||
|
||||
// functions for bispectrum coefficients
|
||||
|
||||
void compute_ui(const SNA_IVEC &, const SNA_IVEC &, const int max_jnum);
|
||||
template <int> void compute_zi_or_yi(const SNA_DVEC *);
|
||||
void compute_yi_from_zi(const SNA_DVEC *);
|
||||
void compute_yterm(int, int, int, const double *);
|
||||
void compute_bi(const SNA_IVEC &);
|
||||
|
||||
// functions for derivatives
|
||||
|
||||
void compute_duidrj(const int, const SNA_IVEC &);
|
||||
void compute_deidrj_e(const int, const SNA_IVEC &, SNA_DVEC *);
|
||||
void compute_deidrj(const int, const SNA_IVEC &, SNA_DVEC *);
|
||||
double compute_sfac(double, double, double, double);
|
||||
SNA_DVEC compute_sfac(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
|
||||
const SNA_DVEC &);
|
||||
inline SNA_DVEC compute_sfac_dsfac(const SNA_DVEC &, const SNA_DVEC &,
|
||||
const SNA_DVEC &, const SNA_DVEC &,
|
||||
SNA_DVEC &);
|
||||
|
||||
// public bispectrum data
|
||||
|
||||
int twojmax;
|
||||
SNA_DVEC *blist;
|
||||
double **dblist;
|
||||
|
||||
// short neighbor list data
|
||||
|
||||
void grow_rij(int);
|
||||
int nmax; // allocated size of short lists
|
||||
|
||||
SNA_DVEC **rij; // short rij list
|
||||
SNA_IVEC *inside; // short neighbor list
|
||||
SNA_DVEC *wj; // short weight list
|
||||
SNA_DVEC *rcutij; // short cutoff list
|
||||
|
||||
// only allocated for switch_inner_flag=1
|
||||
|
||||
SNA_DVEC *sinnerij; // short inner cutoff midpoint list
|
||||
SNA_DVEC *dinnerij; // short inner half-width list
|
||||
|
||||
// only allocated for chem_flag=1
|
||||
|
||||
SNA_IVEC *element; // short element list [0,nelements)
|
||||
|
||||
private:
|
||||
double rmin0, rfac0;
|
||||
|
||||
// data for bispectrum coefficients
|
||||
|
||||
SNA_ZINDICES *idxz;
|
||||
SNA_BINDICES *idxb;
|
||||
|
||||
double **rootpqarray;
|
||||
double *cglist;
|
||||
int ***idxcg_block;
|
||||
|
||||
SNA_DVEC *ulisttot_r, *ulisttot_i;
|
||||
SNA_DVEC **ulist_r_ij, **ulist_i_ij; // short u list
|
||||
int *idxu_block;
|
||||
|
||||
SNA_DVEC *zlist_r, *zlist_i;
|
||||
int ***idxz_block;
|
||||
|
||||
int ***idxb_block;
|
||||
|
||||
SNA_DVEC **dulist_r, **dulist_i;
|
||||
|
||||
SNA_DVEC *ylist_r, *ylist_i;
|
||||
int idxcg_max, idxu_max, idxz_max, idxb_max;
|
||||
|
||||
void create_twojmax_arrays();
|
||||
void destroy_twojmax_arrays();
|
||||
void init_clebsch_gordan();
|
||||
void print_clebsch_gordan();
|
||||
void init_rootpqarray();
|
||||
void zero_uarraytot(const SNA_IVEC &);
|
||||
void add_uarraytot(const SNA_DVEC &, const int, const SNA_IVEC &);
|
||||
void compute_uarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
|
||||
const SNA_DVEC &, const SNA_DVEC &, const int,
|
||||
const SNA_IVEC &);
|
||||
double deltacg(int, int, int);
|
||||
void compute_ncoeff();
|
||||
void compute_duarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
|
||||
const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
|
||||
const SNA_DVEC &, const SNA_DVEC &, int,
|
||||
const SNA_IVEC &);
|
||||
inline double choose_beta(const int, const int, const int,
|
||||
const int, const int, const int, int &);
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
// 1 = cosine
|
||||
int switch_flag;
|
||||
|
||||
// Sets the style for the inner switching function
|
||||
// 0 = none
|
||||
// 1 = cosine
|
||||
int switch_inner_flag;
|
||||
|
||||
// Self-weight
|
||||
double wself;
|
||||
|
||||
int bzero_flag; // 1 if bzero subtracted from barray
|
||||
double *bzero; // array of B values for isolated atoms
|
||||
int bnorm_flag; // 1 if barray divided by j+1
|
||||
int chem_flag; // 1 for multi-element bispectrum components
|
||||
int wselfall_flag; // 1 for adding wself to all element labelings
|
||||
int nelements; // number of elements
|
||||
int ndoubles; // number of multi-element pairs
|
||||
int ntriples; // number of multi-element triplets
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user