Merge branch 'master' of github.com:lammps/lammps into kk_verlet

This commit is contained in:
Stan Gerald Moore
2021-07-27 14:28:54 -06:00
75 changed files with 1346 additions and 528 deletions

View File

@ -244,15 +244,16 @@ if(PKG_ADIOS)
endif()
if(NOT CMAKE_CROSSCOMPILING)
set(MPI_CXX_SKIP_MPICXX TRUE)
find_package(MPI QUIET)
option(BUILD_MPI "Build MPI version" ${MPI_FOUND})
else()
set(MPI_CXX_SKIP_MPICXX TRUE)
option(BUILD_MPI "Build MPI version" OFF)
endif()
if(BUILD_MPI)
# do not include the (obsolete) MPI C++ bindings which makes
# for leaner object files and avoids namespace conflicts
set(MPI_CXX_SKIP_MPICXX TRUE)
# We use a non-standard procedure to cross-compile with MPI on Windows
if((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING)
include(MPI4WIN)
@ -368,6 +369,8 @@ if(PKG_MSCG OR PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_LATTE)
endif()
endif()
# tweak jpeg library names to avoid linker errors with MinGW cross-compilation
set(JPEG_NAMES libjpeg libjpeg-62)
find_package(JPEG QUIET)
option(WITH_JPEG "Enable JPEG support" ${JPEG_FOUND})
if(WITH_JPEG)

View File

@ -54,8 +54,8 @@ if(DOWNLOAD_PLUMED)
set(PLUMED_BUILD_BYPRODUCTS "<INSTALL_DIR>/lib/libplumedWrapper.a")
endif()
set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.7.1/plumed-src-2.7.1.tgz" CACHE STRING "URL for PLUMED tarball")
set(PLUMED_MD5 "4eac6a462ec84dfe0cec96c82421b8e8" CACHE STRING "MD5 checksum of PLUMED tarball")
set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.7.2/plumed-src-2.7.2.tgz" CACHE STRING "URL for PLUMED tarball")
set(PLUMED_MD5 "cfa0b4dd90a81c25d3302e8d97bfeaea" CACHE STRING "MD5 checksum of PLUMED tarball")
mark_as_advanced(PLUMED_URL)
mark_as_advanced(PLUMED_MD5)
@ -72,7 +72,6 @@ if(DOWNLOAD_PLUMED)
${PLUMED_CONFIG_OMP}
CXX=${PLUMED_CONFIG_CXX}
CC=${PLUMED_CONFIG_CC}
PATCH_COMMAND sed -i "/^#include <algorithm>/a #include <limits>" <SOURCE_DIR>/src/lepton/Operation.h
BUILD_BYPRODUCTS ${PLUMED_BUILD_BYPRODUCTS}
)
ExternalProject_get_property(plumed_build INSTALL_DIR)

View File

@ -25,7 +25,7 @@ RasMol visualization programs. Pizza.py has tools that do interactive
3d OpenGL visualization and one that creates SVG images of dump file
snapshots.
.. _pizza: https://pizza.sandia.gov
.. _pizza: https://lammps.github.io/pizza
.. _ensight: https://www.ansys.com/products/fluids/ansys-ensight

View File

@ -24,11 +24,15 @@ General features
^^^^^^^^^^^^^^^^
* runs on a single processor or in parallel
* distributed-memory message-passing parallelism (MPI)
* spatial-decomposition of simulation domain for parallelism
* open-source distribution
* highly portable C++
* optional libraries used: MPI and single-processor FFT
* distributed memory message-passing parallelism (MPI)
* shared memory multi-threading parallelism (OpenMP)
* spatial decomposition of simulation domain for MPI parallelism
* particle decomposition inside of spatial decomposition for OpenMP parallelism
* GPLv2 licensed open-source distribution
* highly portable C++-11
* modular code with most functionality in optional packages
* only depends on MPI library for basic parallel functionality
* other libraries are optional and only required for specific packages
* GPU (CUDA and OpenCL), Intel Xeon Phi, and OpenMP support for many code features
* easy to extend with new features and functionality
* runs from an input script
@ -68,9 +72,9 @@ Interatomic potentials (force fields)
:doc:`improper style <improper_style>`, :doc:`kspace style <kspace_style>`
commands)
* pairwise potentials: Lennard-Jones, Buckingham, Morse, Born-Mayer-Huggins, Yukawa, soft, class 2 (COMPASS), hydrogen bond, tabulated
* pairwise potentials: Lennard-Jones, Buckingham, Morse, Born-Mayer-Huggins, Yukawa, soft, class 2 (COMPASS), hydrogen bond, tabulated
* charged pairwise potentials: Coulombic, point-dipole
* many-body potentials: EAM, Finnis/Sinclair EAM, modified EAM (MEAM), embedded ion method (EIM), EDIP, ADP, Stillinger-Weber, Tersoff, REBO, AIREBO, ReaxFF, COMB, SNAP, Streitz-Mintmire, 3-body polymorphic
* many-body potentials: EAM, Finnis/Sinclair EAM, modified EAM (MEAM), embedded ion method (EIM), EDIP, ADP, Stillinger-Weber, Tersoff, REBO, AIREBO, ReaxFF, COMB, SNAP, Streitz-Mintmire, 3-body polymorphic
* long-range interactions for charge, point-dipoles, and LJ dispersion: Ewald, Wolf, PPPM (similar to particle-mesh Ewald)
* polarization models: :doc:`QEq <fix_qeq>`, :doc:`core/shell model <Howto_coreshell>`, :doc:`Drude dipole model <Howto_drude>`
* charge equilibration (QEq via dynamic, point, shielded, Slater methods)
@ -170,9 +174,12 @@ Multi-replica models
^^^^^^^^^^^^^^^^^^^^
* :doc:`nudged elastic band <neb>`
* :doc:`hyperdynamics <hyper>`
* :doc:`parallel replica dynamics <prd>`
* :doc:`temperature accelerated dynamics <tad>`
* :doc:`parallel tempering <temper>`
* :doc:`path-integral MD <fix_pimd>`
* multi-walker collective variables with :doc:`Colvars <fix_colvars>` and :doc:`Plumed <fix_plumed>`
.. _prepost:
@ -187,7 +194,7 @@ Pre- and post-processing
plotting, and visualization for LAMMPS simulations. Pizza.py is
written in `Python <python_>`_ and is available for download from `the Pizza.py WWW site <pizza_>`_.
.. _pizza: https://pizza.sandia.gov
.. _pizza: https://lammps.github.io/pizza
.. _python: http://www.python.org

View File

@ -77,7 +77,7 @@ Here are suggestions on how to perform these tasks:
it easier to analyze and plot. See the :doc:`Tools <Tools>` doc page
for more discussion of the various tools.
* **Pizza.py:** Our group has also written a separate toolkit called
`Pizza.py <https://pizza.sandia.gov>`_ which can do certain kinds of
`Pizza.py <https://lammps.github.io/pizza>`_ which can do certain kinds of
setup, analysis, plotting, and visualization (via OpenGL) for LAMMPS
simulations. It thus provides some functionality for several of the
above bullets. Pizza.py is written in `Python <http://www.python.org>`_

View File

@ -18,10 +18,11 @@ supercomputers.
.. _mpi: https://en.wikipedia.org/wiki/Message_Passing_Interface
.. _lws: https://www.lammps.org
LAMMPS is written in C++. Earlier versions were written in F77 and
F90. See the `History page <https://www.lammps.org/history.html>`_ of
the website for details. All versions can be downloaded from the
`LAMMPS website <lws_>`_.
LAMMPS is written in C++ and requires a compiler that is at least
compatible with the C++-11 standard.
Earlier versions were written in F77 and F90. See the `History page
<https://www.lammps.org/history.html>`_ of the website for details. All
versions can be downloaded from the `LAMMPS website <lws_>`_.
LAMMPS is designed to be easy to modify or extend with new
capabilities, such as new force fields, atom types, boundary
@ -41,8 +42,9 @@ short distances, so that the local density of particles never becomes
too large. This is in contrast to methods used for modeling plasma
or gravitational bodies (e.g. galaxy formation).
On parallel machines, LAMMPS uses spatial-decomposition techniques to
partition the simulation domain into small sub-domains of equal
computational cost, one of which is assigned to each processor.
Processors communicate and store "ghost" atom information for atoms
that border their sub-domain.
On parallel machines, LAMMPS uses spatial-decomposition techniques with
MPI parallelization to partition the simulation domain into small
sub-domains of equal computational cost, one of which is assigned to
each processor. Processors communicate and store "ghost" atom
information for atoms that border their sub-domain. Multi-threading
parallelization with with particle-decomposition can be used in addition.

View File

@ -35,9 +35,9 @@ visualization package you have installed.
Note that for GL, you need to be able to run the Pizza.py GL tool,
which is included in the pizza sub-directory. See the Pizza.py doc pages for more info:
* `https://pizza.sandia.gov <pizza_>`_
* `https://lammps.github.io/pizza <pizza_>`_
.. _pizza: https://pizza.sandia.gov
.. _pizza: https://lammps.github.io/pizza
Note that for AtomEye, you need version 3, and there is a line in the
scripts that specifies the path and name of the executable. See the

View File

@ -15,7 +15,7 @@ Sandia which provides tools for doing setup, analysis, plotting, and
visualization for LAMMPS simulations.
.. _lws: https://www.lammps.org
.. _pizza: https://pizza.sandia.gov
.. _pizza: https://lammps.github.io/pizza
.. _python: https://www.python.org
Additional tools included in the LAMMPS distribution are described on

View File

@ -558,7 +558,7 @@ Related commands
:doc:`group <group>`, :doc:`processors <processors>`,
:doc:`fix balance <fix_balance>`, :doc:`comm_style <comm_style>`
.. _pizza: https://pizza.sandia.gov
.. _pizza: https://lammps.github.io/pizza
Default
"""""""

View File

@ -119,8 +119,7 @@ The per-atom vector values will be an ID > 0, as explained above.
Restrictions
""""""""""""
These computes are part of the EXTRA-COMPUTE package. They are only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
none
Related commands
""""""""""""""""

View File

@ -182,8 +182,7 @@ page for an overview of LAMMPS output options.
Restrictions
""""""""""""
This compute is part of the EXTRA-COMPUTE package. It is only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
none
Related commands
""""""""""""""""

View File

@ -230,7 +230,7 @@ individual values and the file itself.
The *atom*, *local*, and *custom* styles create files in a simple text
format that is self-explanatory when viewing a dump file. Some of the
LAMMPS post-processing tools described on the :doc:`Tools <Tools>` doc
page, including `Pizza.py <https://pizza.sandia.gov>`_,
page, including `Pizza.py <https://lammps.github.io/pizza>`_,
work with this format, as does the :doc:`rerun <rerun>` command.
For post-processing purposes the *atom*, *local*, and *custom* text

View File

@ -590,8 +590,8 @@ Play the movie:
% mplayer foo.mpg
% ffplay bar.avi
* c) Use the `Pizza.py <https://pizza.sandia.gov>`_
`animate tool <https://pizza.sandia.gov/doc/animate.html>`_,
* c) Use the `Pizza.py <https://lammps.github.io/pizza>`_
`animate tool <https://lammps.github.io/pizza/doc/animate.html>`_,
which works directly on a series of image files.
.. code-block:: python

View File

@ -403,7 +403,7 @@ Related commands
:doc:`group <group>`, :doc:`processors <processors>`, :doc:`balance <balance>`,
:doc:`comm_style <comm_style>`
.. _pizza: https://pizza.sandia.gov
.. _pizza: https://lammps.github.io/pizza
Default
"""""""

View File

@ -89,7 +89,7 @@ first stage) is changed to:
.. parsed-literal::
Fi = -Grad(V) + 2 (Grad(V) dot T') T'
Fi = -Grad(V) + 2 (Grad(V) dot T') T' + Fnudge_perp
and the relaxation procedure is continued to a new converged MEP.

View File

@ -53,6 +53,7 @@ checksums = { \
'2.6.3' : 'a9f8028fd74528c2024781ea1fdefeee', \
'2.7.0' : '95f29dd0c067577f11972ff90dfc7d12', \
'2.7.1' : '4eac6a462ec84dfe0cec96c82421b8e8', \
'2.7.2' : 'cfa0b4dd90a81c25d3302e8d97bfeaea', \
}
# parse and process arguments

View File

@ -1823,7 +1823,6 @@ class lammps(object):
with ExceptionCheck(self):
return self.lib.lammps_fix_external_get_force(self.lmp, fix_id.encode())
return None
# -------------------------------------------------------------------------

View File

@ -647,7 +647,6 @@ void PPPMDispDielectric::fieldforce_c_ad()
// convert E-field to force and substract self forces
const double qfactor = qqrd2e * scale;
double qtmp = eps[i]*q[i];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
@ -751,7 +750,7 @@ void PPPMDispDielectric::fieldforce_c_peratom()
extended to non-neutral systems (J. Chem. Phys. 131, 094107).
------------------------------------------------------------------------- */
void PPPMDispDielectric::slabcorr(int eflag)
void PPPMDispDielectric::slabcorr(int /*eflag*/)
{
// compute local contribution to global dipole moment

View File

@ -116,7 +116,7 @@ ComputeHMA::ComputeHMA(LAMMPS *lmp, int narg, char **arg) :
computeU = computeP = computeCv = -1;
returnAnharmonic = 0;
size_vector = 0;
memory->create(extlist, 3, "hma:extlist");
extlist = new int[3];
for (int iarg=4; iarg<narg; iarg++) {
if (!strcmp(arg[iarg], "u")) {
if (computeU>-1) continue;
@ -145,20 +145,11 @@ ComputeHMA::ComputeHMA(LAMMPS *lmp, int narg, char **arg) :
}
}
if (size_vector == 0) {
error->all(FLERR,"Illegal compute hma command");
}
if (size_vector<3) {
memory->grow(extlist, size_vector, "hma:extlist");
}
memory->create(vector, size_vector, "hma:vector");
if (size_vector == 0) error->all(FLERR,"Illegal compute hma command");
vector = new double[size_vector];
if (computeU>-1 || computeCv>-1) {
peflag = 1;
}
if (computeP>-1) {
pressflag = 1;
}
if (computeU>-1 || computeCv>-1) peflag = 1;
if (computeP>-1) pressflag = 1;
nmax = 0;
}
@ -170,10 +161,11 @@ ComputeHMA::~ComputeHMA()
// check nfix in case all fixes have already been deleted
if (modify->nfix) modify->delete_fix(id_fix);
delete [] id_fix;
delete [] id_temp;
memory->destroy(extlist);
memory->destroy(vector);
delete[] id_fix;
delete[] id_temp;
delete[] extlist;
delete[] vector;
memory->destroy(deltaR);
}

View File

@ -162,7 +162,11 @@ void AngleCharmmIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n ++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -246,7 +250,11 @@ void AngleCharmmIntel::eval(const int vflag,
// apply force to each of 3 atoms
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -162,7 +162,11 @@ void AngleHarmonicIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n ++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -228,7 +232,11 @@ void AngleHarmonicIntel::eval(const int vflag,
// apply force to each of 3 atoms
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -158,7 +158,11 @@ void BondFENEIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n ++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -215,7 +219,11 @@ void BondFENEIntel::eval(const int vflag,
// apply force to each of 2 atoms
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -155,7 +155,11 @@ void BondHarmonicIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n ++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -184,7 +188,11 @@ void BondHarmonicIntel::eval(const int vflag,
// apply force to each of 2 atoms
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -181,9 +181,16 @@ void DihedralCharmmIntel::eval(const int vflag,
}
#if defined(LMP_SIMD_COMPILER_TEST)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:sedihedral, sevdwl, secoul, sv0, sv1, sv2, \
sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, \
spv5)
#else
#pragma simd reduction(+:sedihedral, sevdwl, secoul, sv0, sv1, sv2, \
sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, spv5)
sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, \
spv5)
#endif
#pragma vector aligned
for (int n = nfrom; n < nto; n++) {
#endif
for (int n = nfrom; n < nto; n += npl) {
@ -329,7 +336,11 @@ void DihedralCharmmIntel::eval(const int vflag,
#if defined(LMP_SIMD_COMPILER_TEST)
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i2 < nlocal) {
@ -408,7 +419,11 @@ void DihedralCharmmIntel::eval(const int vflag,
// apply force to each of 4 atoms
#if defined(LMP_SIMD_COMPILER_TEST)
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -154,7 +154,11 @@ void DihedralFourierIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n ++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -304,7 +308,11 @@ void DihedralFourierIntel::eval(const int vflag,
}
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -154,7 +154,11 @@ void DihedralHarmonicIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n ++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -299,7 +303,11 @@ void DihedralHarmonicIntel::eval(const int vflag,
}
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -158,7 +158,11 @@ void DihedralOPLSIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n ++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -319,7 +323,11 @@ void DihedralOPLSIntel::eval(const int vflag,
}
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -635,19 +635,31 @@ void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
if (_nthreads == 4) {
acc_t *f_scalar3 = f_scalar2 + f_stride4;
acc_t *f_scalar4 = f_scalar3 + f_stride4;
_use_simd_pragma("vector aligned")
_use_simd_pragma("simd")
#if defined(USE_OMP_SIMD)
#pragma omp simd aligned(f_scalar,f_scalar2,f_scalar3,f_scalar4:64)
#elif defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int n = 0; n < o_range; n++)
f_scalar[n] += f_scalar2[n] + f_scalar3[n] + f_scalar4[n];
} else if (_nthreads == 2) {
_use_simd_pragma("vector aligned")
_use_simd_pragma("simd")
#if defined(USE_OMP_SIMD)
#pragma omp simd aligned(f_scalar,f_scalar2:64)
#elif defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int n = 0; n < o_range; n++)
f_scalar[n] += f_scalar2[n];
} else {
acc_t *f_scalar3 = f_scalar2 + f_stride4;
_use_simd_pragma("vector aligned")
_use_simd_pragma("simd")
#if defined(USE_OMP_SIMD)
#pragma omp simd aligned(f_scalar,f_scalar2,f_scalar3:64)
#elif defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int n = 0; n < o_range; n++)
f_scalar[n] += f_scalar2[n] + f_scalar3[n];
}
@ -662,8 +674,12 @@ void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
acc_t *f_scalar2 = f_scalar + f_stride4;
for (int t = 1; t < _nthreads; t++) {
_use_simd_pragma("vector aligned")
_use_simd_pragma("simd")
#if defined(USE_OMP_SIMD)
#pragma omp simd aligned(f_scalar,f_scalar2:64)
#elif defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int n = iifrom; n < iito; n++)
f_scalar[n] += f_scalar2[n];
f_scalar2 += f_stride4;

View File

@ -99,8 +99,12 @@ void FixNHIntel::remap()
if (allremap) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
const double d0 = x[i].x - b0;
@ -112,8 +116,12 @@ void FixNHIntel::remap()
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & dilate_group_bit) {
@ -278,8 +286,12 @@ void FixNHIntel::remap()
if (allremap) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
@ -288,8 +300,12 @@ void FixNHIntel::remap()
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & dilate_group_bit) {
@ -415,8 +431,12 @@ void FixNHIntel::nh_v_press()
if (igroup == 0) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
v[i].x *= f0;
@ -425,8 +445,12 @@ void FixNHIntel::nh_v_press()
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
@ -448,8 +472,12 @@ void FixNHIntel::nve_v()
double * _noalias const v = atom->v[0];
const double * _noalias const f = atom->f[0];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++)
v[i] += _dtfm[i] * f[i];
@ -468,15 +496,23 @@ void FixNHIntel::nve_x()
if (igroup == 0) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++)
x[i] += dtv * v[i];
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
if (_dtfm[i] != 0.0)
@ -500,15 +536,23 @@ void FixNHIntel::nh_v_temp()
if (igroup == 0) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++)
v[i] *= factor_eta;
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
if (_dtfm[i] != 0.0)

View File

@ -97,8 +97,12 @@ void FixNVEAsphereIntel::initial_integrate(int /*vflag*/)
dtq = 0.5 * dtv;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
v[i] += _dtfm[i] * f[i];
@ -108,8 +112,12 @@ void FixNVEAsphereIntel::initial_integrate(int /*vflag*/)
// update angular momentum by 1/2 step
if (igroup == 0) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
double *quat = bonus[ellipsoid[i]].quat;
@ -118,8 +126,12 @@ void FixNVEAsphereIntel::initial_integrate(int /*vflag*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
@ -143,8 +155,12 @@ void FixNVEAsphereIntel::final_integrate()
const double * _noalias const torque = atom->torque[0];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
v[i] += _dtfm[i] * f[i];

View File

@ -68,8 +68,12 @@ void FixNVEIntel::initial_integrate(int /*vflag*/)
if (igroup == 0 && atom->ntypes == 1 && !atom->rmass) {
const double dtfm = dtf / atom->mass[1];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
v[i] += dtfm * f[i];
@ -78,8 +82,12 @@ void FixNVEIntel::initial_integrate(int /*vflag*/)
} else if (igroup == 0) {
if (neighbor->ago == 0) reset_dt();
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
v[i] += _dtfm[i] * f[i];
@ -88,8 +96,12 @@ void FixNVEIntel::initial_integrate(int /*vflag*/)
} else {
if (neighbor->ago == 0) reset_dt();
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
if (_dtfm[i] != 0.0) {
@ -112,16 +124,24 @@ void FixNVEIntel::final_integrate()
_nlocal3 = 3 * atom->nlocal;
const double dtfm = dtf / atom->mass[1];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++)
v[i] += dtfm * f[i];
} else if (igroup == 0) {
if (neighbor->ago == 0) reset_dt();
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++) {
v[i] += _dtfm[i] * f[i];
@ -129,8 +149,12 @@ void FixNVEIntel::final_integrate()
} else {
if (neighbor->ago == 0) reset_dt();
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int i = 0; i < _nlocal3; i++)
v[i] += _dtfm[i] * f[i];

View File

@ -165,7 +165,11 @@ void ImproperCvffIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -247,7 +251,11 @@ void ImproperCvffIntel::eval(const int vflag,
flt_t p, pd;
#ifdef LMP_INTEL_USE_SIMDOFF_FIX
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (m == 2) {
@ -319,7 +327,11 @@ void ImproperCvffIntel::eval(const int vflag,
// apply force to each of 4 atoms
#ifdef LMP_INTEL_USE_SIMDOFF_FIX
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -167,7 +167,11 @@ void ImproperHarmonicIntel::eval(const int vflag,
if (VFLAG && vflag) {
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int n = nfrom; n < nto; n++) {
#else
for (int n = nfrom; n < nto; n += npl) {
@ -276,7 +280,11 @@ void ImproperHarmonicIntel::eval(const int vflag,
// apply force to each of 4 atoms
#ifdef LMP_INTEL_USE_SIMDOFF
#if defined(USE_OMP_SIMD)
#pragma omp ordered simd
#else
#pragma simdoff
#endif
#endif
{
if (NEWTON_BOND || i1 < nlocal) {

View File

@ -127,7 +127,8 @@ struct vector_ops<double, KNC> {
}
template<int scale>
static fvec gather(const fvec &from, bvec mask, const ivec &idx, const void *base) {
return _mm512_mask_i32logather_pd(from, mask, idx, base, scale);
return _mm512_mask_i32gather_pd(from, mask, _mm512_castsi512_si256(idx),
base, scale);
}
static fvec blend(const bvec &mask, const fvec &a, const fvec &b) {
return _mm512_mask_blend_pd(mask, a, b);

View File

@ -511,7 +511,8 @@ public:
const int scale) {
assert(scale == sizeof(FVEC_SCAL_T));
# if FVEC_LEN==8
return FVEC_SUFFIX(_mm512_i32logather_)(idx.val_, mem, sizeof(FVEC_SCAL_T));
return FVEC_SUFFIX(_mm512_i32gather_)(_mm512_castsi512_si256(idx.val_),
mem, sizeof(FVEC_SCAL_T));
# else
return FVEC_SUFFIX(_mm512_i32gather_)(idx.val_, mem, sizeof(FVEC_SCAL_T));
# endif
@ -522,8 +523,8 @@ public:
) {
assert(scale == sizeof(FVEC_SCAL_T));
# if FVEC_LEN==8
return FVEC_SUFFIX(_mm512_mask_i32logather_)(src.val_, mask.val_, idx.val_,
mem, sizeof(FVEC_SCAL_T));
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_,
_mm512_castsi512_si256(idx.val_), mem, sizeof(FVEC_SCAL_T));
# else
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_, idx.val_,
mem, sizeof(FVEC_SCAL_T));
@ -609,8 +610,8 @@ public:
) {
assert(scale == sizeof(FVEC_SCAL_T));
# if FVEC_LEN==8
return FVEC_SUFFIX(_mm512_mask_i32logather_)(src.val_, mask.val_, idx.val_,
mem, sizeof(FVEC_SCAL_T));
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_,
_mm512_castsi512_si256(idx.val_), mem, sizeof(FVEC_SCAL_T));
# else
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_, idx.val_,
mem, sizeof(FVEC_SCAL_T));
@ -622,8 +623,9 @@ public:
) {
assert(scale == sizeof(FVEC_SCAL_T));
# if FVEC_LEN==8
FVEC_SUFFIX(_mm512_mask_i32loscatter_)(mem, mask.val_, idx.val_, a.val_,
sizeof(FVEC_SCAL_T));
FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_,
_mm512_castsi512_si256(idx.val_),
a.val_, sizeof(FVEC_SCAL_T));
# else
FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_, idx.val_, a.val_,
sizeof(FVEC_SCAL_T));
@ -666,11 +668,11 @@ public:
const double * mem, const int scale
) {
assert(scale == sizeof(double));
__m512d lo = _mm512_mask_i32logather_pd(src.lo_, mask.val_, idx.val_, mem,
sizeof(double));
__m512d hi = _mm512_mask_i32logather_pd(src.hi_, get_bvec_hi(mask.val_),
get_ivec_hi(idx.val_), mem,
sizeof(double));
__m512d lo = _mm512_mask_i32gather_pd(src.lo_, mask.val_,
_mm512_castsi512_si256(idx.val_),
mem, sizeof(double));
__m512d hi = _mm512_mask_i32gather_pd(src.hi_, get_bvec_hi(mask.val_),
_mm512_castsi512_si256(get_ivec_hi(idx.val_)), mem, sizeof(double));
return avec16pd(lo, hi);
}
VEC_INLINE static void mask_i32loscatter(
@ -678,10 +680,12 @@ public:
const avec16pd &a, const int scale
) {
assert(scale == sizeof(double));
_mm512_mask_i32loscatter_pd(mem, mask.val_, idx.val_, a.lo_,
sizeof(double));
_mm512_mask_i32loscatter_pd(mem, get_bvec_hi(mask.val_),
get_ivec_hi(idx.val_), a.hi_, sizeof(double));
_mm512_mask_i32scatter_pd(mem, mask.val_,
_mm512_castsi512_si256(idx.val_), a.lo_,
sizeof(double));
_mm512_mask_i32scatter_pd(mem, get_bvec_hi(mask.val_),
_mm512_castsi512_si256(get_ivec_hi(idx.val_)),
a.hi_, sizeof(double));
}
#define AVEC2_BINOP(the_sym, the_name) \

View File

@ -17,8 +17,13 @@
------------------------------------------------------------------------- */
#ifdef __INTEL_LLVM_COMPILER
#define USE_OMP_SIMD
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
#define _MM_SCALE_1 1
#define _MM_SCALE_2 2
#define _MM_SCALE_4 4
#define _MM_SCALE_8 8
#endif
#ifdef __INTEL_COMPILER
@ -332,6 +337,9 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#endif
// TO BE DEPRECATED
#ifndef USE_OMP_SIMD
#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \
f_stride, pos, ov0, ov1, ov2, \
ov3, ov4, ov5) \
@ -526,6 +534,198 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
} \
}
#else
#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \
f_stride, pos, ov0, ov1, ov2, \
ov3, ov4, ov5) \
{ \
acc_t *f_scalar = &f_start[0].x; \
flt_t *x_scalar = &pos[minlocal].x; \
int f_stride4 = f_stride * 4; \
_alignvar(acc_t ovv[16],64); \
int vwidth; \
if (sizeof(acc_t) == sizeof(double)) \
vwidth = INTEL_COMPILE_WIDTH/2; \
else \
vwidth = INTEL_COMPILE_WIDTH; \
if (vwidth < 4) vwidth = 4; \
_use_simd_pragma("omp simd aligned(ovv:64)") \
for (int v = 0; v < vwidth; v++) ovv[v] = (acc_t)0.0; \
int remainder = lt % vwidth; \
if (lf > lt) remainder = 0; \
const int v_range = lt - remainder; \
if (nthreads == 2) { \
acc_t *f_scalar2 = f_scalar + f_stride4; \
for (int n = lf; n < v_range; n += vwidth) { \
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2,ovv,x_scalar:64)")\
for (int v = 0; v < vwidth; v++) { \
f_scalar[n+v] += f_scalar2[n+v]; \
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
} \
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
if (vwidth > 4) { \
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
} \
if (vwidth > 8) { \
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
} \
} \
_use_simd_pragma("vector aligned") \
_use_simd_pragma("ivdep") \
_use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \
for (int n = v_range; n < lt; n++) \
f_scalar[n] += f_scalar2[n]; \
} else if (nthreads==4) { \
acc_t *f_scalar2 = f_scalar + f_stride4; \
acc_t *f_scalar3 = f_scalar2 + f_stride4; \
acc_t *f_scalar4 = f_scalar3 + f_stride4; \
for (int n = lf; n < v_range; n += vwidth) { \
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2,f_scalar3,f_scalar4,ovv:64)") \
for (int v = 0; v < vwidth; v++) { \
f_scalar[n+v] += f_scalar2[n+v] + f_scalar3[n+v] + \
f_scalar4[n+v]; \
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
} \
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
if (vwidth > 4) { \
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
} \
if (vwidth > 8) { \
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
} \
} \
_use_simd_pragma("vector aligned") \
_use_simd_pragma("ivdep") \
_use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \
for (int n = v_range; n < lt; n++) \
f_scalar[n] += f_scalar2[n] + f_scalar3[n] + f_scalar4[n]; \
} else if (nthreads==1) { \
for (int n = lf; n < v_range; n += vwidth) { \
_use_simd_pragma("omp simd aligned(ovv,f_scalar,x_scalar:64)") \
for (int v = 0; v < vwidth; v++) \
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
if (vwidth > 4) { \
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
} \
if (vwidth > 8) { \
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
} \
} \
} else if (nthreads==3) { \
acc_t *f_scalar2 = f_scalar + f_stride4; \
acc_t *f_scalar3 = f_scalar2 + f_stride4; \
for (int n = lf; n < v_range; n += vwidth) { \
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2,f_scalar3,ovv,x_scalar:64)") \
for (int v = 0; v < vwidth; v++) { \
f_scalar[n+v] += f_scalar2[n+v] + f_scalar3[n+v]; \
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
} \
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
if (vwidth > 4) { \
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
} \
if (vwidth > 8) { \
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
} \
} \
_use_simd_pragma("vector aligned") \
_use_simd_pragma("ivdep") \
_use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \
for (int n = v_range; n < lt; n++) \
f_scalar[n] += f_scalar2[n] + f_scalar3[n]; \
} \
for (int n = v_range; n < lt; n += 4) { \
_use_simd_pragma("vector aligned") \
_use_simd_pragma("ivdep") \
for (int v = 0; v < 4; v++) \
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
} \
ov0 += ovv[0]; \
ov1 += ovv[1]; \
ov2 += ovv[2]; \
if (vwidth > 4) { \
ov0 += ovv[4]; \
ov1 += ovv[5]; \
ov2 += ovv[6]; \
} \
if (vwidth > 8) { \
ov0 += ovv[8] + ovv[12]; \
ov1 += ovv[9] + ovv[13]; \
ov2 += ovv[10] + ovv[14]; \
} \
}
#define IP_PRE_fdotr_acc_force(nall, minlocal, nthreads, f_start, \
f_stride, pos, offload, vflag, ov0, ov1, \
ov2, ov3, ov4, ov5) \
{ \
int o_range = (nall - minlocal) * 4; \
IP_PRE_omp_range_id_align(iifrom, iito, tid, o_range, nthreads, \
sizeof(acc_t)); \
\
acc_t *f_scalar = &f_start[0].x; \
int f_stride4 = f_stride * 4; \
int t; \
if (vflag == VIRIAL_FDOTR) t = 4; else t = 1; \
acc_t *f_scalar2 = f_scalar + f_stride4 * t; \
for ( ; t < nthreads; t++) { \
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2:64)") \
for (int n = iifrom; n < iito; n++) \
f_scalar[n] += f_scalar2[n]; \
f_scalar2 += f_stride4; \
} \
\
if (vflag == VIRIAL_FDOTR) { \
int nt_min = MIN(4,nthreads); \
IP_PRE_fdotr_acc_force_l5(iifrom, iito, minlocal, nt_min, f_start, \
f_stride, pos, ov0, ov1, ov2, ov3, ov4, \
ov5); \
} \
}
#endif
#ifdef _LMP_INTEL_OFFLOAD
#include <sys/time.h>

View File

@ -173,7 +173,7 @@ namespace ip_simd {
}
inline SIMD_double SIMD_gather(const double *p, const SIMD_int &i) {
return _mm512_i32logather_pd(i, p, _MM_SCALE_8);
return _mm512_i32gather_pd(_mm512_castsi512_si256(i), p, _MM_SCALE_8);
}
inline SIMD_int SIMD_gather(const SIMD_mask &m, const int *p,
@ -190,8 +190,8 @@ namespace ip_simd {
inline SIMD_double SIMD_gather(const SIMD_mask &m, const double *p,
const SIMD_int &i) {
return _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, p,
_MM_SCALE_8);
return _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), p, _MM_SCALE_8);
}
template <typename T>
@ -227,8 +227,8 @@ namespace ip_simd {
inline SIMD_double SIMD_gatherz(const SIMD_mask &m, const double *p,
const SIMD_int &i) {
return _mm512_mask_i32logather_pd( _mm512_set1_pd(0.0), m, i, p,
_MM_SCALE_8);
return _mm512_mask_i32gather_pd( _mm512_set1_pd(0.0), m,
_mm512_castsi512_si256(i),p, _MM_SCALE_8);
}
// ------- Store Operations
@ -257,7 +257,8 @@ namespace ip_simd {
inline void SIMD_scatter(const SIMD_mask &m, double *p,
const SIMD_int &i, const SIMD_double &vec) {
_mm512_mask_i32loscatter_pd(p, m, i, vec, _MM_SCALE_8);
_mm512_mask_i32scatter_pd(p, m, _mm512_castsi512_si256(i), vec,
_MM_SCALE_8);
}
// ------- Arithmetic Operations
@ -834,23 +835,29 @@ namespace ip_simd {
inline void SIMD_atom_gather(const SIMD_mask &m, const double *atom,
const SIMD_int &i, SIMD_double &x,
SIMD_double &y, SIMD_double &z) {
x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom,
_MM_SCALE_2);
y = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+1,
_MM_SCALE_2);
z = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+2,
_MM_SCALE_2);
x = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom,
_MM_SCALE_2);
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+1,
_MM_SCALE_2);
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+2,
_MM_SCALE_2);
}
inline void SIMD_atom_gather(const SIMD_mask &m, const double *atom,
const SIMD_int &i, SIMD_double &x,
SIMD_double &y, SIMD_double &z, SIMD_int &type) {
x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom,
_MM_SCALE_2);
y = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+1,
_MM_SCALE_2);
z = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+2,
_MM_SCALE_2);
x = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom,
_MM_SCALE_2);
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+1,
_MM_SCALE_2);
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), atom+2,
_MM_SCALE_2);
type = _mm512_mask_i32gather_epi32(_mm512_undefined_epi32(), m, i, atom+3,
_MM_SCALE_2);
}
@ -888,10 +895,12 @@ namespace ip_simd {
const SIMD_int &joffset, SIMD_double &eng) {
SIMD_double jeng;
SIMD_conflict_pi_reduce1(rmask, joffset, eng);
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
force, _MM_SCALE_2);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jeng = jeng + eng;
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
jeng, _MM_SCALE_2);
}
inline void SIMD_jeng_update(const SIMD_mask &rmask, double *force,
@ -899,20 +908,24 @@ namespace ip_simd {
SIMD_double engd, jeng;
engd = _mm512_cvtps_pd(_mm512_castps512_ps256(eng));
SIMD_conflict_pi_reduce1(rmask, joffset, engd);
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
force, _MM_SCALE_2);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jeng = jeng + engd;
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
jeng, _MM_SCALE_2);
SIMD_mask rmask2 = rmask >> 8;
engd = _mm512_cvtps_pd(_mm512_castps512_ps256(
_mm512_shuffle_f32x4(eng,eng,238)));
SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238);
SIMD_conflict_pi_reduce1(rmask2, joffset2, engd);
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
force, _MM_SCALE_2);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
_mm512_castsi512_si256(joffset2),
force, _MM_SCALE_2);
jeng = jeng + engd;
_mm512_mask_i32loscatter_pd(force, rmask2, joffset2, jeng, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, rmask2, _mm512_castsi512_si256(joffset2),
jeng, _MM_SCALE_2);
}
inline void SIMD_jeng_update_hi(const SIMD_mask &mask, float *force,
@ -926,10 +939,12 @@ namespace ip_simd {
SIMD_double jeng;
SIMD_conflict_pi_reduce1(rmask, joffset, eng);
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
force, _MM_SCALE_2);
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jeng = jeng + eng;
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
jeng, _MM_SCALE_2);
}
inline void SIMD_safe_jforce(const SIMD_mask &m, float *force,
@ -956,18 +971,24 @@ namespace ip_simd {
SIMD_double &fy, SIMD_double &fz) {
SIMD_conflict_pi_reduce3(m, i, fx, fy, fz);
SIMD_double jfrc;
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force,
_MM_SCALE_2);
jfrc = jfrc + fx;
_mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1,
_MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 1,
_MM_SCALE_2);
jfrc = jfrc + fy;
_mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2,
_MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+1, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 2,
_MM_SCALE_2);
jfrc = jfrc + fz;
_mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+2, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
}
inline void SIMD_safe_jforce(const SIMD_mask &rmask, double *force,
@ -979,40 +1000,54 @@ namespace ip_simd {
amzd = _mm512_cvtps_pd(_mm512_castps512_ps256(amz));
SIMD_conflict_pi_reduce3(rmask, joffset, amxd, amyd, amzd);
SIMD_double jfrc;
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
force, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force, _MM_SCALE_2);
jfrc = jfrc + amxd;
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
force + 1, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force + 1, _MM_SCALE_2);
jfrc = jfrc + amyd;
_mm512_mask_i32loscatter_pd(force+1, rmask, joffset, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
force + 2, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+1, rmask, _mm512_castsi512_si256(joffset),
jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
_mm512_castsi512_si256(joffset),
force + 2, _MM_SCALE_2);
jfrc = jfrc + amzd;
_mm512_mask_i32loscatter_pd(force+2, rmask, joffset, jfrc, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+2, rmask, _mm512_castsi512_si256(joffset),
jfrc, _MM_SCALE_2);
SIMD_mask rmask2 = rmask >> 8;
amxd = _mm512_cvtps_pd(_mm512_castps512_ps256(
_mm512_shuffle_f32x4(amx,amx,238)));
_mm512_shuffle_f32x4(amx,amx,238)));
amyd = _mm512_cvtps_pd(_mm512_castps512_ps256(
_mm512_shuffle_f32x4(amy,amy,238)));
_mm512_shuffle_f32x4(amy,amy,238)));
amzd = _mm512_cvtps_pd(_mm512_castps512_ps256(
_mm512_shuffle_f32x4(amz,amz,238)));
_mm512_shuffle_f32x4(amz,amz,238)));
SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238);
SIMD_conflict_pi_reduce3(rmask2, joffset2, amxd, amyd, amzd);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
force, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
_mm512_castsi512_si256(joffset2),
force, _MM_SCALE_2);
jfrc = jfrc + amxd;
_mm512_mask_i32loscatter_pd(force, rmask2, joffset2, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
force + 1, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, rmask2, _mm512_castsi512_si256(joffset2),
jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
_mm512_castsi512_si256(joffset2),
force + 1, _MM_SCALE_2);
jfrc = jfrc + amyd;
_mm512_mask_i32loscatter_pd(force+1, rmask2, joffset2, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
force + 2, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+1, rmask2,
_mm512_castsi512_si256(joffset2), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
_mm512_castsi512_si256(joffset2),
force + 2, _MM_SCALE_2);
jfrc = jfrc + amzd;
_mm512_mask_i32loscatter_pd(force+2, rmask2, joffset2, jfrc, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+2, rmask2,
_mm512_castsi512_si256(joffset2), jfrc,
_MM_SCALE_2);
}
inline void SIMD_jforce_update(const SIMD_mask &m, float *force,
@ -1064,18 +1099,24 @@ namespace ip_simd {
const SIMD_int &i, const SIMD_double &fx,
const SIMD_double &fy, const SIMD_double &fz) {
SIMD_double jfrc;
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force,
_MM_SCALE_2);
jfrc = jfrc - fx;
_mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1,
_MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 1,
_MM_SCALE_2);
jfrc = jfrc - fy;
_mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2,
_MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+1, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 2,
_MM_SCALE_2);
jfrc = jfrc - fz;
_mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+2, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
}
inline void SIMD_jforce_update(const SIMD_mask &rmask,
@ -1502,11 +1543,12 @@ namespace ip_simd {
fwtmp = SIMD_add(fwtmp, hmask, fwtmp, hevdwl);
fjtmp = SIMD_add(fjtmp, hmask, fjtmp, hevdwl);
SIMD_conflict_pi_reduce1(hmask, k, hevdwl);
SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(),
hmask, k, force + 3,
_MM_SCALE_2);
SIMD_double keng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), hmask,
_mm512_castsi512_si256(k),
force + 3, _MM_SCALE_2);
keng = keng + hevdwl;
_mm512_mask_i32loscatter_pd(force + 3, hmask, k, keng, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force + 3, hmask, _mm512_castsi512_si256(k),
keng, _MM_SCALE_2);
}
}
@ -1523,11 +1565,12 @@ namespace ip_simd {
fwtmp = SIMD_add(fwtmp, hmask, fwtmp, hevdwl);
fjtmp = SIMD_add(fjtmp, hmask, fjtmp, hevdwl);
SIMD_conflict_pi_reduce1(hmask, k, hevdwl);
SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(),
hmask, k, force + 3,
_MM_SCALE_2);
SIMD_double keng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), hmask,
_mm512_castsi512_si256(k),
force + 3, _MM_SCALE_2);
keng = keng + hevdwl;
_mm512_mask_i32loscatter_pd(force + 3, hmask, k, keng, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force + 3, hmask, _mm512_castsi512_si256(k),
keng, _MM_SCALE_2);
}
SIMD_mask hmask2 = hmask >> 8;
facradd = _mm512_cvtps_pd(_mm512_castps512_ps256(
@ -1539,11 +1582,13 @@ namespace ip_simd {
fjtmp2 = SIMD_add(fjtmp2, hmask2, fjtmp2, hevdwl);
SIMD_int k2 = _mm512_shuffle_i32x4(k, k, 238);
SIMD_conflict_pi_reduce1(hmask2, k2, hevdwl);
SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(),
hmask2, k2, force + 3,
_MM_SCALE_2);
SIMD_double keng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(),
hmask2,
_mm512_castsi512_si256(k2),
force + 3, _MM_SCALE_2);
keng = keng + hevdwl;
_mm512_mask_i32loscatter_pd(force + 3, hmask2, k2, keng, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force + 3, hmask2, _mm512_castsi512_si256(k2),
keng, _MM_SCALE_2);
}
}
@ -1815,24 +1860,32 @@ namespace ip_simd {
const int EFLAG, const int eatom,
const SIMD_double &fwtmp) {
SIMD_double jfrc;
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force,
_MM_SCALE_2);
jfrc = jfrc + fx;
_mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1,
_MM_SCALE_2);
_mm512_mask_i32scatter_pd(force, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 1,
_MM_SCALE_2);
jfrc = jfrc + fy;
_mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2);
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2,
_MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+1, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i), force + 2,
_MM_SCALE_2);
jfrc = jfrc + fz;
_mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+2, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
if (EFLAG) {
if (eatom) {
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i,
force + 3, _MM_SCALE_2);
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
_mm512_castsi512_si256(i),
force + 3, _MM_SCALE_2);
jfrc = jfrc + fwtmp;
_mm512_mask_i32loscatter_pd(force+3, m, i, jfrc, _MM_SCALE_2);
_mm512_mask_i32scatter_pd(force+3, m, _mm512_castsi512_si256(i), jfrc,
_MM_SCALE_2);
}
}
}

View File

@ -324,7 +324,11 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++)
tj[ncount++] = binpacked[jj];
@ -345,15 +349,23 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
const int bstart = binhead[ibin + stencil[k]];
const int bend = binhead[ibin + stencil[k] + 1];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++)
tj[ncount++] = binpacked[jj];
}
} // if i < nlocal
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int u = 0; u < ncount; u++) {
const int j = tj[u];
@ -425,12 +437,16 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
int alln = n;
n = 0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#ifdef LMP_INTEL_NBOR_COMPAT
#pragma ivdep
#else
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
#pragma vector aligned
#endif
for (int u = 0; u < alln; u++) {
int which;
@ -454,12 +470,16 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
alln = n2;
n2 = maxnbors * 2;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#ifdef LMP_INTEL_NBOR_COMPAT
#pragma ivdep
#else
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
#pragma vector aligned
#endif
for (int u = n2; u < alln; u++) {
int which;

View File

@ -344,14 +344,22 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++)
tj[ncount++] = binpacked[jj];
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int u = 0; u < ncount; u++) {
const int j = tj[u];
@ -375,7 +383,11 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
const int bstart = binhead[ibin];
const int bend = binhead[ibin + 1];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++) {
const int j = binpacked[jj];
@ -533,12 +545,16 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
n = pack_offset;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#ifdef LMP_INTEL_NBOR_COMPAT
#pragma ivdep
#else
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
#pragma vector aligned
#endif
for (int u = n; u < alln; u++) {
int which;
@ -566,12 +582,16 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
n2 = pack_offset + maxnbors;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#ifdef LMP_INTEL_NBOR_COMPAT
#pragma ivdep
#else
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
#pragma vector aligned
#endif
for (int u = n2; u < alln; u++) {
int which;
@ -737,8 +757,14 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
int jnum = numneigh[i];
if (!THREE) IP_PRE_neighbor_pad(jnum, offload);
#if __INTEL_COMPILER+0 > 1499
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(max:vlmax,vgmax) \
reduction(min:vlmin, vgmin)
#else
#pragma simd reduction(max:vlmax,vgmax) \
reduction(min:vlmin, vgmin)
#endif
#pragma vector aligned
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
#endif
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj] & NEIGHMASK;
@ -782,8 +808,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
int jnum = numneigh[i];
if (!THREE) IP_PRE_neighbor_pad(jnum, offload);
int jj = 0;
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
for (jj = 0; jj < jnum; jj++) {
const int which = jlist[jj] >> SBBITS & 3;
const int j = jlist[jj] & NEIGHMASK;

View File

@ -292,8 +292,9 @@ void PairAIREBOIntel::compute(
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
pvector[0] = pvector[1] = pvector[2] = 0.0;

View File

@ -77,8 +77,9 @@ void PairBuckCoulCutIntel::compute(int eflag, int vflag,
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -248,12 +249,18 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
fxtmp = fytmp = fztmp = (acc_t)0;
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
if (NEWTON_PAIR == 0)
if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
if (vflag == VIRIAL_PAIR)
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#endif
for (int jj = 0; jj < jnum; jj++) {
flt_t forcecoul, forcebuck, evdwl, ecoul;

View File

@ -77,8 +77,9 @@ void PairBuckCoulLongIntel::compute(int eflag, int vflag,
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -309,9 +310,14 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < ej; jj++) {
flt_t forcecoul, forcebuck, evdwl, ecoul;

View File

@ -70,8 +70,9 @@ void PairBuckIntel::compute(int eflag, int vflag,
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -230,12 +231,18 @@ void PairBuckIntel::eval(const int offload, const int vflag,
fxtmp = fytmp = fztmp = (acc_t)0;
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
if (NEWTON_PAIR == 0)
if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
if (vflag == VIRIAL_PAIR)
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#endif
for (int jj = 0; jj < jnum; jj++) {

View File

@ -89,8 +89,9 @@ void PairDPDIntel::compute(int eflag, int vflag,
ev_init(eflag, vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -289,9 +290,14 @@ void PairDPDIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#endif
for (int jj = 0; jj < jnum; jj++) {
flt_t forcelj, evdwl;

View File

@ -82,8 +82,9 @@ void PairEAMIntel::compute(int eflag, int vflag,
ev_init(eflag, vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -327,8 +328,12 @@ void PairEAMIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:rhoi)
#else
#pragma simd reduction(+:rhoi)
#endif
#pragma vector aligned
#endif
for (int jj = 0; jj < ej; jj++) {
int jtype;
@ -369,23 +374,35 @@ void PairEAMIntel::eval(const int offload, const int vflag,
const int rcount = nall;
if (nthreads == 2) {
double *trho2 = rho + nmax;
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
for (int n = 0; n < rcount; n++)
rho[n] += trho2[n];
} else if (nthreads == 4) {
double *trho2 = rho + nmax;
double *trho3 = trho2 + nmax;
double *trho4 = trho3 + nmax;
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
for (int n = 0; n < rcount; n++)
rho[n] += trho2[n] + trho3[n] + trho4[n];
} else {
double *trhon = rho + nmax;
for (int t = 1; t < nthreads; t++) {
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
for (int n = 0; n < rcount; n++)
rho[n] += trhon[n];
trhon += nmax;
@ -414,8 +431,12 @@ void PairEAMIntel::eval(const int offload, const int vflag,
if (EFLAG) tevdwl = (acc_t)0.0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:tevdwl)
#else
#pragma simd reduction(+:tevdwl)
#endif
#pragma vector aligned
#endif
for (int ii = iifrom; ii < iito; ++ii) {
const int i = ilist[ii];
@ -510,9 +531,14 @@ void PairEAMIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#endif
for (int jj = 0; jj < ej; jj++) {
int jtype;

View File

@ -76,8 +76,9 @@ void PairGayBerneIntel::compute(int eflag, int vflag,
ev_init(eflag, vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nall = atom->nlocal + atom->nghost;
@ -449,9 +450,14 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
__assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0);
#endif
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp, \
t3tmp,sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
#else
#pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp, \
t3tmp,sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
#endif
#pragma vector aligned
#pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \
sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
#endif
for (int jj = 0; jj < packed_j; jj++) {
flt_t a2_0, a2_1, a2_2, a2_3, a2_4, a2_5, a2_6, a2_7, a2_8;
@ -806,8 +812,12 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
acc_t *f_scalar2 = f_scalar + fst4;
for (int t = 1; t < nthreads; t++) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma vector aligned
#endif
for (int n = iifrom * 8; n < sto; n++)
f_scalar[n] += f_scalar2[n];

View File

@ -73,8 +73,9 @@ void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag,
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -294,9 +295,14 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < ej; jj++) {
flt_t forcecoul, forcelj, evdwl;

View File

@ -77,8 +77,9 @@ void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag,
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -314,9 +315,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < ej; jj++) {
flt_t forcecoul, forcelj, evdwl, ecoul;

View File

@ -76,8 +76,9 @@ void PairLJCutCoulLongIntel::compute(int eflag, int vflag,
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -305,9 +306,14 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
#endif
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < ej; jj++) {
flt_t forcecoul, forcelj, evdwl, ecoul;

View File

@ -68,8 +68,9 @@ void PairLJCutIntel::compute(int eflag, int vflag,
ev_init(eflag, vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -241,9 +242,15 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5) \
aligned(jlist,x,ljc12oi,special_lj,f,lj34i:64)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#pragma vector aligned
#endif
#endif
for (int jj = 0; jj < jnum; jj++) {
flt_t forcelj, evdwl;

View File

@ -97,8 +97,9 @@ void PairSWIntel::compute(int eflag, int vflag,
ev_init(eflag, vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;
@ -371,8 +372,12 @@ void PairSWIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl)
#else
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl)
#endif
#pragma vector aligned
#endif
for (int jj = 0; jj < ejnum_pad; jj++) {
acc_t fjxtmp, fjytmp, fjztmp, fjtmp;

View File

@ -91,8 +91,9 @@ void PairTersoffIntel::compute(int eflag, int vflag,
ev_init(eflag,vflag);
if (vflag_atom)
error->all(FLERR,"INTEL package does not support per-atom stress");
if (vflag && !vflag_fdotr)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
if (vflag && !vflag_fdotr && force->newton_pair)
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
"with newton on");
const int inum = list->inum;
const int nthreads = comm->nthreads;

View File

@ -770,8 +770,12 @@ void PPPMDispIntel::particle_map(double delx, double dely, double delz,
IP_PRE_omp_range_id_align(iifrom, iito, tid, nlocal, nthr, sizeof(ATOM_T));
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:flag)
#else
#pragma simd reduction(+:flag)
#endif
#pragma vector aligned
#endif
for (int i = iifrom; i < iito; i++) {
@ -876,7 +880,11 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho_lookup[idx][k];
@ -885,7 +893,11 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1,r2,r3;
@ -917,8 +929,12 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int mzy = m*nix + mz;
FFT_SCALAR x0 = y0*rho[1][m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mzyx = l + mzy;
@ -939,7 +955,11 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid, nthr);
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
for (int j = 1; j < nthr; j++) {
@ -1025,7 +1045,11 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho6_lookup[idx][k];
@ -1034,7 +1058,11 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1,r2,r3;
@ -1067,8 +1095,12 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int mzy = m*nix + mz;
FFT_SCALAR x0 = y0*rho[1][m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mzyx = l + mzy;
@ -1089,7 +1121,11 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid_6, nthr);
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
for (int j = 1; j < nthr; j++) {
@ -1173,7 +1209,11 @@ void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho6_lookup[idx][k];
@ -1182,7 +1222,11 @@ void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1,r2,r3;
@ -1215,8 +1259,12 @@ void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int my = m + nysum;
FFT_SCALAR x0 = y0*rho[1][m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l + nxsum;
@ -1307,7 +1355,11 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho6_lookup[idx][k];
@ -1316,7 +1368,11 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1,r2,r3;
@ -1349,8 +1405,12 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int mzy = m*nix + mz;
FFT_SCALAR x0 = y0*rho[1][m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mzyx = l + mzy;
@ -1373,7 +1433,11 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid_6*nsplit, nthr);
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
for (int j = 1; j < nthr; j++) {
@ -1454,7 +1518,11 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho0[k] = rho_lookup[idx][k];
@ -1463,7 +1531,11 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1 = rho_coeff[order-1][k];
@ -1498,8 +1570,12 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int my = m+nysum;
FFT_SCALAR y0 = z0*rho1[m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l+nxsum;
@ -1624,7 +1700,11 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho_lookup[idx][k];
@ -1636,7 +1716,11 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
@ -1680,8 +1764,12 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l + nxsum;
@ -1702,7 +1790,11 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
particle_ekx[i] *= hx_inv;
@ -1802,7 +1894,11 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho0[k] = rho6_lookup[idx][k];
@ -1811,7 +1907,11 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1 = rho_coeff_6[order_6-1][k];
@ -1846,8 +1946,12 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int my = m+nysum;
FFT_SCALAR y0 = z0*rho1[m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l+nxsum;
@ -1967,7 +2071,11 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho6_lookup[idx][k];
@ -1979,7 +2087,11 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
@ -2023,8 +2135,12 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l + nxsum;
@ -2045,7 +2161,11 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
particle_ekx[i] *= hx_inv;
@ -2143,7 +2263,11 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho0[k] = rho6_lookup[idx][k];
@ -2152,7 +2276,11 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1 = rho_coeff_6[order_6-1][k];
@ -2206,8 +2334,12 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int my = m+nysum;
FFT_SCALAR y0 = z0*rho1[m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l+nxsum;
@ -2398,7 +2530,11 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho6_lookup[idx][k];
@ -2410,7 +2546,11 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
@ -2479,8 +2619,12 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l + nxsum;
@ -2541,7 +2685,11 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
particle_ekx0[i] *= hx_inv;
@ -2671,7 +2819,11 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho0[k] = rho6_lookup[idx][k];
@ -2680,7 +2832,11 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1 = rho_coeff_6[order_6-1][k];
@ -2721,8 +2877,12 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int my = m+nysum;
FFT_SCALAR y0 = z0*rho1[m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l+nxsum;
@ -2848,7 +3008,11 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho6_lookup[idx][k];
@ -2860,7 +3024,11 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower_6; k <= nupper_6; k++) {
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
@ -2909,8 +3077,12 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int l = 0; l < order; l++) {
int mx = l + nxsum;
@ -2992,7 +3164,11 @@ void PPPMDispIntel::precompute_rho()
for (int i = 0; i < rho_points; i++) {
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower; k<=nupper;k++) {
FFT_SCALAR r1 = ZEROF;
@ -3006,7 +3182,11 @@ void PPPMDispIntel::precompute_rho()
}
if (differentiation_flag == 1) {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower; k<=nupper;k++) {
FFT_SCALAR r1 = ZEROF;
@ -3026,7 +3206,11 @@ void PPPMDispIntel::precompute_rho()
for (int i = 0; i < rho_points; i++) {
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower_6; k<=nupper_6;k++) {
FFT_SCALAR r1 = ZEROF;
@ -3040,7 +3224,11 @@ void PPPMDispIntel::precompute_rho()
}
if (differentiation_flag == 1) {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower_6; k<=nupper_6;k++) {
FFT_SCALAR r1 = ZEROF;

View File

@ -394,8 +394,12 @@ void PPPMIntel::particle_map(IntelBuffers<flt_t,acc_t> *buffers)
IP_PRE_omp_range_id_align(iifrom, iito, tid, nlocal, nthr, sizeof(ATOM_T));
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#if defined(USE_OMP_SIMD)
#pragma omp simd reduction(+:flag)
#else
#pragma simd reduction(+:flag)
#endif
#pragma vector aligned
#endif
for (int i = iifrom; i < iito; i++) {
@ -500,7 +504,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho_lookup[idx][k];
@ -509,7 +517,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1,r2,r3;
@ -541,7 +553,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
int mzy = m*nix + mz;
FFT_SCALAR x0 = y0*rho[1][m];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
int mzyx = l + mzy;
@ -563,7 +579,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid, nthr);
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
for (int j = 1; j < nthr; j++) {
@ -645,7 +665,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho0[k] = rho_lookup[idx][k];
@ -654,7 +678,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1 = rho_coeff[order-1][k];
@ -690,7 +718,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
int my = m+nysum;
FFT_SCALAR y0 = z0*rho1[m];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
int mx = l+nxsum;
@ -813,7 +845,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
dz = dz*half_rho_scale + half_rho_scale_plus;
int idz = dz;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
rho[0][k] = rho_lookup[idx][k];
@ -825,7 +861,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
}
} else {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k = nlower; k <= nupper; k++) {
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
@ -871,7 +911,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
int mx = l + nxsum;
@ -893,7 +937,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
}
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int i = ifrom; i < ito; i++) {
particle_ekx[i] *= hx_inv;
@ -942,7 +990,11 @@ void PPPMIntel::precompute_rho()
for (int i = 0; i < rho_points; i++) {
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower; k<=nupper;k++) {
FFT_SCALAR r1 = ZEROF;
@ -956,7 +1008,11 @@ void PPPMIntel::precompute_rho()
}
if (differentiation_flag == 1) {
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#pragma omp simd
#else
#pragma simd
#endif
#endif
for (int k=nlower; k<=nupper;k++) {
FFT_SCALAR r1 = ZEROF;

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -14,28 +13,26 @@
#include "compute_force_tally.h"
#include <cmath>
#include "atom.h"
#include "group.h"
#include "pair.h"
#include "update.h"
#include "memory.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "comm.h"
#include "group.h"
#include "memory.h"
#include "pair.h"
#include "update.h"
#include <cmath>
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg)
ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
{
if (narg < 4) error->all(FLERR,"Illegal compute force/tally command");
if (narg < 4) error->all(FLERR, "Illegal compute force/tally command");
igroup2 = group->find(arg[3]);
if (igroup2 == -1)
error->all(FLERR,"Could not find compute force/tally second group ID");
if (igroup2 == -1) error->all(FLERR, "Could not find compute force/tally second group ID");
groupbit2 = group->bitmask[igroup2];
scalar_flag = 1;
@ -46,7 +43,7 @@ ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) :
comm_reverse = size_peratom_cols = 3;
extscalar = 1;
peflag = 1; // we need Pair::ev_tally() to be run
peflag = 1; // we need Pair::ev_tally() to be run
did_setup = invoked_peratom = invoked_scalar = -1;
nmax = -1;
@ -68,17 +65,16 @@ ComputeForceTally::~ComputeForceTally()
void ComputeForceTally::init()
{
if (force->pair == nullptr)
error->all(FLERR,"Trying to use compute force/tally without pair style");
error->all(FLERR, "Trying to use compute force/tally without pair style");
else
force->pair->add_tally_callback(this);
if (comm->me == 0) {
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
error->warning(FLERR,"Compute force/tally used with incompatible pair style");
error->warning(FLERR, "Compute force/tally used with incompatible pair style");
if (force->bond || force->angle || force->dihedral
|| force->improper || force->kspace)
error->warning(FLERR,"Compute force/tally only called from pair style");
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
error->warning(FLERR, "Compute force/tally only called from pair style");
}
did_setup = -1;
}
@ -99,51 +95,48 @@ void ComputeForceTally::pair_setup_callback(int, int)
if (atom->nmax > nmax) {
memory->destroy(fatom);
nmax = atom->nmax;
memory->create(fatom,nmax,size_peratom_cols,"force/tally:fatom");
memory->create(fatom, nmax, size_peratom_cols, "force/tally:fatom");
array_atom = fatom;
}
// clear storage
for (int i=0; i < ntotal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
fatom[i][j] = 0.0;
for (int i = 0; i < ntotal; ++i)
for (int j = 0; j < size_peratom_cols; ++j) fatom[i][j] = 0.0;
for (int i=0; i < size_peratom_cols; ++i)
vector[i] = ftotal[i] = 0.0;
for (int i = 0; i < size_peratom_cols; ++i) vector[i] = ftotal[i] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton,
double, double, double fpair,
double dx, double dy, double dz)
void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton, double, double,
double fpair, double dx, double dy, double dz)
{
const int * const mask = atom->mask;
const int *const mask = atom->mask;
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
if (newton || i < nlocal) {
if (mask[i] & groupbit) {
ftotal[0] += fpair*dx;
ftotal[1] += fpair*dy;
ftotal[2] += fpair*dz;
ftotal[0] += fpair * dx;
ftotal[1] += fpair * dy;
ftotal[2] += fpair * dz;
}
fatom[i][0] += fpair*dx;
fatom[i][1] += fpair*dy;
fatom[i][2] += fpair*dz;
fatom[i][0] += fpair * dx;
fatom[i][1] += fpair * dy;
fatom[i][2] += fpair * dz;
}
if (newton || j < nlocal) {
if (mask[j] & groupbit) {
ftotal[0] -= fpair*dx;
ftotal[1] -= fpair*dy;
ftotal[2] -= fpair*dz;
ftotal[0] -= fpair * dx;
ftotal[1] -= fpair * dy;
ftotal[2] -= fpair * dz;
}
fatom[j][0] -= fpair*dx;
fatom[j][1] -= fpair*dy;
fatom[j][2] -= fpair*dz;
fatom[j][0] -= fpair * dx;
fatom[j][1] -= fpair * dy;
fatom[j][2] -= fpair * dz;
}
}
}
@ -152,7 +145,7 @@ void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton
int ComputeForceTally::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
int i, m, last;
m = 0;
last = first + n;
@ -168,7 +161,7 @@ int ComputeForceTally::pack_reverse_comm(int n, int first, double *buf)
void ComputeForceTally::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
int i, j, m;
m = 0;
for (i = 0; i < n; i++) {
@ -184,15 +177,14 @@ void ComputeForceTally::unpack_reverse_comm(int n, int *list, double *buf)
double ComputeForceTally::compute_scalar()
{
invoked_scalar = update->ntimestep;
if ((did_setup != invoked_scalar)
|| (update->eflag_global != invoked_scalar))
error->all(FLERR,"Energy was not tallied on needed timestep");
if ((did_setup != invoked_scalar) || (update->eflag_global != invoked_scalar))
error->all(FLERR, "Energy was not tallied on needed timestep");
// sum accumulated forces across procs
MPI_Allreduce(ftotal,vector,size_peratom_cols,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(ftotal, vector, size_peratom_cols, MPI_DOUBLE, MPI_SUM, world);
scalar = sqrt(vector[0]*vector[0]+vector[1]*vector[1]+vector[2]*vector[2]);
scalar = sqrt(vector[0] * vector[0] + vector[1] * vector[1] + vector[2] * vector[2]);
return scalar;
}
@ -201,9 +193,8 @@ double ComputeForceTally::compute_scalar()
void ComputeForceTally::compute_peratom()
{
invoked_peratom = update->ntimestep;
if ((did_setup != invoked_peratom)
|| (update->eflag_global != invoked_peratom))
error->all(FLERR,"Energy was not tallied on needed timestep");
if ((did_setup != invoked_peratom) || (update->eflag_global != invoked_peratom))
error->all(FLERR, "Energy was not tallied on needed timestep");
// collect contributions from ghost atoms
@ -213,8 +204,7 @@ void ComputeForceTally::compute_peratom()
// clear out ghost atom data after it has been collected to local atoms
const int nall = atom->nlocal + atom->nghost;
for (int i = atom->nlocal; i < nall; ++i)
for (int j = 0; j < size_peratom_cols; ++j)
fatom[i][j] = 0.0;
for (int j = 0; j < size_peratom_cols; ++j) fatom[i][j] = 0.0;
}
}
@ -224,7 +214,6 @@ void ComputeForceTally::compute_peratom()
double ComputeForceTally::memory_usage()
{
double bytes = (nmax < 0) ? 0 : nmax*size_peratom_cols * sizeof(double);
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
return bytes;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -15,26 +14,25 @@
#include "compute_heat_flux_tally.h"
#include "atom.h"
#include "group.h"
#include "pair.h"
#include "update.h"
#include "memory.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "comm.h"
#include "group.h"
#include "memory.h"
#include "pair.h"
#include "update.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg)
Compute(lmp, narg, arg)
{
if (narg < 4) error->all(FLERR,"Illegal compute heat/flux/tally command");
if (narg < 4) error->all(FLERR, "Illegal compute heat/flux/tally command");
igroup2 = group->find(arg[3]);
if (igroup2 == -1)
error->all(FLERR,"Could not find compute heat/flux/tally second group ID");
if (igroup2 == -1) error->all(FLERR, "Could not find compute heat/flux/tally second group ID");
groupbit2 = group->bitmask[igroup2];
vector_flag = 1;
@ -44,7 +42,7 @@ ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) :
comm_reverse = 7;
extvector = 1;
size_vector = 6;
peflag = 1; // we need Pair::ev_tally() to be run
peflag = 1; // we need Pair::ev_tally() to be run
did_setup = 0;
invoked_peratom = invoked_scalar = -1;
@ -71,17 +69,16 @@ ComputeHeatFluxTally::~ComputeHeatFluxTally()
void ComputeHeatFluxTally::init()
{
if (force->pair == nullptr)
error->all(FLERR,"Trying to use compute heat/flux/tally without pair style");
error->all(FLERR, "Trying to use compute heat/flux/tally without pair style");
else
force->pair->add_tally_callback(this);
if (comm->me == 0) {
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
error->warning(FLERR,"Compute heat/flux/tally used with incompatible pair style");
error->warning(FLERR, "Compute heat/flux/tally used with incompatible pair style");
if (force->bond || force->angle || force->dihedral
|| force->improper || force->kspace)
error->warning(FLERR,"Compute heat/flux/tally only called from pair style");
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
error->warning(FLERR, "Compute heat/flux/tally only called from pair style");
}
did_setup = -1;
}
@ -102,13 +99,13 @@ void ComputeHeatFluxTally::pair_setup_callback(int, int)
memory->destroy(stress);
memory->destroy(eatom);
nmax = atom->nmax;
memory->create(stress,nmax,6,"heat/flux/tally:stress");
memory->create(eatom,nmax,"heat/flux/tally:eatom");
memory->create(stress, nmax, 6, "heat/flux/tally:stress");
memory->create(eatom, nmax, "heat/flux/tally:eatom");
}
// clear storage
for (int i=0; i < ntotal; ++i) {
for (int i = 0; i < ntotal; ++i) {
eatom[i] = 0.0;
stress[i][0] = 0.0;
stress[i][1] = 0.0;
@ -118,30 +115,29 @@ void ComputeHeatFluxTally::pair_setup_callback(int, int)
stress[i][5] = 0.0;
}
for (int i=0; i < size_vector; ++i)
vector[i] = heatj[i] = 0.0;
for (int i = 0; i < size_vector; ++i) vector[i] = heatj[i] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton,
double evdwl, double ecoul, double fpair,
double dx, double dy, double dz)
void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton, double evdwl,
double ecoul, double fpair, double dx, double dy,
double dz)
{
const int * const mask = atom->mask;
const int *const mask = atom->mask;
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
const double epairhalf = 0.5 * (evdwl + ecoul);
fpair *= 0.5;
const double v0 = dx*dx*fpair; // dx*fpair = Fij_x
const double v1 = dy*dy*fpair;
const double v2 = dz*dz*fpair;
const double v3 = dx*dy*fpair;
const double v4 = dx*dz*fpair;
const double v5 = dy*dz*fpair;
const double v0 = dx * dx * fpair; // dx*fpair = Fij_x
const double v1 = dy * dy * fpair;
const double v2 = dz * dz * fpair;
const double v3 = dx * dy * fpair;
const double v4 = dx * dz * fpair;
const double v5 = dy * dz * fpair;
if (newton || i < nlocal) {
eatom[i] += epairhalf;
@ -168,7 +164,7 @@ void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int new
int ComputeHeatFluxTally::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
int i, m, last;
m = 0;
last = first + n;
@ -188,7 +184,7 @@ int ComputeHeatFluxTally::pack_reverse_comm(int n, int first, double *buf)
void ComputeHeatFluxTally::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
int i, j, m;
m = 0;
for (i = 0; i < n; i++) {
@ -209,7 +205,7 @@ void ComputeHeatFluxTally::compute_vector()
{
invoked_vector = update->ntimestep;
if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
error->all(FLERR,"Energy was not tallied on needed timestep");
error->all(FLERR, "Energy was not tallied on needed timestep");
// collect contributions from ghost atoms
@ -244,26 +240,28 @@ void ComputeHeatFluxTally::compute_vector()
double *rmass = atom->rmass;
int *type = atom->type;
double jc[3] = {0.0,0.0,0.0};
double jv[3] = {0.0,0.0,0.0};
double jc[3] = {0.0, 0.0, 0.0};
double jv[3] = {0.0, 0.0, 0.0};
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
const double * const vi = v[i];
const double * const si = stress[i];
const double *const vi = v[i];
const double *const si = stress[i];
double ke_i;
if (rmass) ke_i = pfactor * rmass[i];
else ke_i = pfactor * mass[type[i]];
ke_i *= (vi[0]*vi[0] + vi[1]*vi[1] + vi[2]*vi[2]);
if (rmass)
ke_i = pfactor * rmass[i];
else
ke_i = pfactor * mass[type[i]];
ke_i *= (vi[0] * vi[0] + vi[1] * vi[1] + vi[2] * vi[2]);
ke_i += eatom[i];
jc[0] += ke_i*vi[0];
jc[1] += ke_i*vi[1];
jc[2] += ke_i*vi[2];
jv[0] += si[0]*vi[0] + si[3]*vi[1] + si[4]*vi[2];
jv[1] += si[3]*vi[0] + si[1]*vi[1] + si[5]*vi[2];
jv[2] += si[4]*vi[0] + si[5]*vi[1] + si[2]*vi[2];
jc[0] += ke_i * vi[0];
jc[1] += ke_i * vi[1];
jc[2] += ke_i * vi[2];
jv[0] += si[0] * vi[0] + si[3] * vi[1] + si[4] * vi[2];
jv[1] += si[3] * vi[0] + si[1] * vi[1] + si[5] * vi[2];
jv[2] += si[4] * vi[0] + si[5] * vi[1] + si[2] * vi[2];
}
}
@ -274,7 +272,7 @@ void ComputeHeatFluxTally::compute_vector()
heatj[3] = jc[0];
heatj[4] = jc[1];
heatj[5] = jc[2];
MPI_Allreduce(heatj,vector,size_vector,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(heatj, vector, size_vector, MPI_DOUBLE, MPI_SUM, world);
}
/* ----------------------------------------------------------------------
@ -283,7 +281,6 @@ void ComputeHeatFluxTally::compute_vector()
double ComputeHeatFluxTally::memory_usage()
{
double bytes = (nmax < 0) ? 0 : nmax*comm_reverse * sizeof(double);
double bytes = (nmax < 0) ? 0 : nmax * (double)comm_reverse * sizeof(double);
return bytes;
}

View File

@ -233,6 +233,6 @@ void ComputeHeatFluxVirialTally::compute_peratom()
double ComputeHeatFluxVirialTally::memory_usage()
{
double bytes = (nmax < 0) ? 0 : nmax * size_peratom_cols * sizeof(double);
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
return bytes;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -15,25 +14,23 @@
#include "compute_pe_mol_tally.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "group.h"
#include "pair.h"
#include "update.h"
#include "error.h"
#include "force.h"
#include "comm.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg)
ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
{
if (narg < 4) error->all(FLERR,"Illegal compute pe/mol/tally command");
if (narg < 4) error->all(FLERR, "Illegal compute pe/mol/tally command");
igroup2 = group->find(arg[3]);
if (igroup2 == -1)
error->all(FLERR,"Could not find compute pe/mol/tally second group ID");
if (igroup2 == -1) error->all(FLERR, "Could not find compute pe/mol/tally second group ID");
groupbit2 = group->bitmask[igroup2];
vector_flag = 1;
@ -42,7 +39,7 @@ ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) :
dynamic_group_allow = 0;
extvector = 1;
peflag = 1; // we need Pair::ev_tally() to be run
peflag = 1; // we need Pair::ev_tally() to be run
did_setup = invoked_vector = -1;
vector = new double[size_vector];
@ -61,20 +58,18 @@ ComputePEMolTally::~ComputePEMolTally()
void ComputePEMolTally::init()
{
if (force->pair == nullptr)
error->all(FLERR,"Trying to use compute pe/mol/tally without pair style");
error->all(FLERR, "Trying to use compute pe/mol/tally without pair style");
else
force->pair->add_tally_callback(this);
if (atom->molecule_flag == 0)
error->all(FLERR,"Compute pe/mol/tally requires molecule IDs");
if (atom->molecule_flag == 0) error->all(FLERR, "Compute pe/mol/tally requires molecule IDs");
if (comm->me == 0) {
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
error->warning(FLERR,"Compute pe/mol/tally used with incompatible pair style");
error->warning(FLERR, "Compute pe/mol/tally used with incompatible pair style");
if (force->bond || force->angle || force->dihedral
|| force->improper || force->kspace)
error->warning(FLERR,"Compute pe/mol/tally only called from pair style");
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
error->warning(FLERR, "Compute pe/mol/tally only called from pair style");
}
did_setup = -1;
}
@ -93,29 +88,33 @@ void ComputePEMolTally::pair_setup_callback(int, int)
}
/* ---------------------------------------------------------------------- */
void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton,
double evdwl, double ecoul, double,
double, double, double)
void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton, double evdwl,
double ecoul, double, double, double, double)
{
const int * const mask = atom->mask;
const tagint * const molid = atom->molecule;
const int *const mask = atom->mask;
const tagint *const molid = atom->molecule;
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
evdwl *= 0.5; ecoul *= 0.5;
evdwl *= 0.5;
ecoul *= 0.5;
if (newton || i < nlocal) {
if (molid[i] == molid[j]) {
etotal[0] += evdwl; etotal[1] += ecoul;
etotal[0] += evdwl;
etotal[1] += ecoul;
} else {
etotal[2] += evdwl; etotal[3] += ecoul;
etotal[2] += evdwl;
etotal[3] += ecoul;
}
}
if (newton || j < nlocal) {
if (molid[i] == molid[j]) {
etotal[0] += evdwl; etotal[1] += ecoul;
etotal[0] += evdwl;
etotal[1] += ecoul;
} else {
etotal[2] += evdwl; etotal[3] += ecoul;
etotal[2] += evdwl;
etotal[3] += ecoul;
}
}
}
@ -127,10 +126,9 @@ void ComputePEMolTally::compute_vector()
{
invoked_vector = update->ntimestep;
if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
error->all(FLERR,"Energy was not tallied on needed timestep");
error->all(FLERR, "Energy was not tallied on needed timestep");
// sum accumulated energies across procs
MPI_Allreduce(etotal,vector,size_vector,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(etotal, vector, size_vector, MPI_DOUBLE, MPI_SUM, world);
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -15,26 +14,24 @@
#include "compute_pe_tally.h"
#include "atom.h"
#include "group.h"
#include "pair.h"
#include "update.h"
#include "memory.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "comm.h"
#include "group.h"
#include "memory.h"
#include "pair.h"
#include "update.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg)
ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
{
if (narg < 4) error->all(FLERR,"Illegal compute pe/tally command");
if (narg < 4) error->all(FLERR, "Illegal compute pe/tally command");
igroup2 = group->find(arg[3]);
if (igroup2 == -1)
error->all(FLERR,"Could not find compute pe/tally second group ID");
if (igroup2 == -1) error->all(FLERR, "Could not find compute pe/tally second group ID");
groupbit2 = group->bitmask[igroup2];
scalar_flag = 1;
@ -45,7 +42,7 @@ ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) :
comm_reverse = size_peratom_cols = 2;
extscalar = 1;
peflag = 1; // we need Pair::ev_tally() to be run
peflag = 1; // we need Pair::ev_tally() to be run
did_setup = invoked_peratom = invoked_scalar = -1;
nmax = -1;
@ -67,17 +64,16 @@ ComputePETally::~ComputePETally()
void ComputePETally::init()
{
if (force->pair == nullptr)
error->all(FLERR,"Trying to use compute pe/tally without a pair style");
error->all(FLERR, "Trying to use compute pe/tally without a pair style");
else
force->pair->add_tally_callback(this);
if (comm->me == 0) {
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
error->warning(FLERR,"Compute pe/tally used with incompatible pair style");
error->warning(FLERR, "Compute pe/tally used with incompatible pair style");
if (force->bond || force->angle || force->dihedral
|| force->improper || force->kspace)
error->warning(FLERR,"Compute pe/tally only called from pair style");
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
error->warning(FLERR, "Compute pe/tally only called from pair style");
}
did_setup = -1;
}
@ -98,14 +94,13 @@ void ComputePETally::pair_setup_callback(int, int)
if (atom->nmax > nmax) {
memory->destroy(eatom);
nmax = atom->nmax;
memory->create(eatom,nmax,size_peratom_cols,"pe/tally:eatom");
memory->create(eatom, nmax, size_peratom_cols, "pe/tally:eatom");
array_atom = eatom;
}
// clear storage
for (int i=0; i < ntotal; ++i)
eatom[i][0] = eatom[i][1] = 0.0;
for (int i = 0; i < ntotal; ++i) eatom[i][0] = eatom[i][1] = 0.0;
vector[0] = etotal[0] = vector[1] = etotal[1] = 0.0;
@ -113,23 +108,27 @@ void ComputePETally::pair_setup_callback(int, int)
}
/* ---------------------------------------------------------------------- */
void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton,
double evdwl, double ecoul, double,
double, double, double)
void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton, double evdwl,
double ecoul, double, double, double, double)
{
const int * const mask = atom->mask;
const int *const mask = atom->mask;
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
evdwl *= 0.5; ecoul *= 0.5;
evdwl *= 0.5;
ecoul *= 0.5;
if (newton || i < nlocal) {
etotal[0] += evdwl; eatom[i][0] += evdwl;
etotal[1] += ecoul; eatom[i][1] += ecoul;
etotal[0] += evdwl;
eatom[i][0] += evdwl;
etotal[1] += ecoul;
eatom[i][1] += ecoul;
}
if (newton || j < nlocal) {
etotal[0] += evdwl; eatom[j][0] += evdwl;
etotal[1] += ecoul; eatom[j][1] += ecoul;
etotal[0] += evdwl;
eatom[j][0] += evdwl;
etotal[1] += ecoul;
eatom[j][1] += ecoul;
}
}
}
@ -138,7 +137,7 @@ void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton,
int ComputePETally::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
int i, m, last;
m = 0;
last = first + n;
@ -153,7 +152,7 @@ int ComputePETally::pack_reverse_comm(int n, int first, double *buf)
void ComputePETally::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
int i, j, m;
m = 0;
for (i = 0; i < n; i++) {
@ -168,15 +167,14 @@ void ComputePETally::unpack_reverse_comm(int n, int *list, double *buf)
double ComputePETally::compute_scalar()
{
invoked_scalar = update->ntimestep;
if ((did_setup != invoked_scalar)
|| (update->eflag_global != invoked_scalar))
error->all(FLERR,"Energy was not tallied on needed timestep");
if ((did_setup != invoked_scalar) || (update->eflag_global != invoked_scalar))
error->all(FLERR, "Energy was not tallied on needed timestep");
// sum accumulated energies across procs
MPI_Allreduce(etotal,vector,size_peratom_cols,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(etotal, vector, size_peratom_cols, MPI_DOUBLE, MPI_SUM, world);
scalar = vector[0]+vector[1];
scalar = vector[0] + vector[1];
return scalar;
}
@ -185,9 +183,8 @@ double ComputePETally::compute_scalar()
void ComputePETally::compute_peratom()
{
invoked_peratom = update->ntimestep;
if ((did_setup != invoked_peratom)
|| (update->eflag_global != invoked_peratom))
error->all(FLERR,"Energy was not tallied on needed timestep");
if ((did_setup != invoked_peratom) || (update->eflag_global != invoked_peratom))
error->all(FLERR, "Energy was not tallied on needed timestep");
// collect contributions from ghost atoms
@ -196,8 +193,7 @@ void ComputePETally::compute_peratom()
// clear out ghost atom data after it has been collected to local atoms
const int nall = atom->nlocal + atom->nghost;
for (int i = atom->nlocal; i < nall; ++i)
eatom[i][0] = eatom[i][1] = 0.0;
for (int i = atom->nlocal; i < nall; ++i) eatom[i][0] = eatom[i][1] = 0.0;
}
}
@ -207,7 +203,6 @@ void ComputePETally::compute_peratom()
double ComputePETally::memory_usage()
{
double bytes = (nmax < 0) ? 0 : nmax*size_peratom_cols * sizeof(double);
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
return bytes;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -15,27 +14,25 @@
#include "compute_stress_tally.h"
#include "atom.h"
#include "group.h"
#include "pair.h"
#include "update.h"
#include "memory.h"
#include "error.h"
#include "force.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "group.h"
#include "memory.h"
#include "pair.h"
#include "update.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg)
ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
{
if (narg < 4) error->all(FLERR,"Illegal compute stress/tally command");
if (narg < 4) error->all(FLERR, "Illegal compute stress/tally command");
igroup2 = group->find(arg[3]);
if (igroup2 == -1)
error->all(FLERR,"Could not find compute stress/tally second group ID");
if (igroup2 == -1) error->all(FLERR, "Could not find compute stress/tally second group ID");
groupbit2 = group->bitmask[igroup2];
scalar_flag = 1;
@ -46,7 +43,7 @@ ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) :
comm_reverse = size_peratom_cols = 6;
extscalar = 0;
peflag = 1; // we need Pair::ev_tally() to be run
peflag = 1; // we need Pair::ev_tally() to be run
did_setup = invoked_peratom = invoked_scalar = -1;
nmax = -1;
@ -70,17 +67,16 @@ ComputeStressTally::~ComputeStressTally()
void ComputeStressTally::init()
{
if (force->pair == nullptr)
error->all(FLERR,"Trying to use compute stress/tally without pair style");
error->all(FLERR, "Trying to use compute stress/tally without pair style");
else
force->pair->add_tally_callback(this);
if (comm->me == 0) {
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
error->warning(FLERR,"Compute stress/tally used with incompatible pair style");
error->warning(FLERR, "Compute stress/tally used with incompatible pair style");
if (force->bond || force->angle || force->dihedral
|| force->improper || force->kspace)
error->warning(FLERR,"Compute stress/tally only called from pair style");
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
error->warning(FLERR, "Compute stress/tally only called from pair style");
}
did_setup = -1;
}
@ -101,55 +97,64 @@ void ComputeStressTally::pair_setup_callback(int, int)
if (atom->nmax > nmax) {
memory->destroy(stress);
nmax = atom->nmax;
memory->create(stress,nmax,size_peratom_cols,"stress/tally:stress");
memory->create(stress, nmax, size_peratom_cols, "stress/tally:stress");
array_atom = stress;
}
// clear storage
for (int i=0; i < ntotal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
stress[i][j] = 0.0;
for (int i = 0; i < ntotal; ++i)
for (int j = 0; j < size_peratom_cols; ++j) stress[i][j] = 0.0;
for (int i=0; i < size_peratom_cols; ++i)
vector[i] = virial[i] = 0.0;
for (int i = 0; i < size_peratom_cols; ++i) vector[i] = virial[i] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newton,
double, double, double fpair,
double dx, double dy, double dz)
void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newton, double, double,
double fpair, double dx, double dy, double dz)
{
const int * const mask = atom->mask;
const int *const mask = atom->mask;
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
fpair *= 0.5;
const double v0 = dx*dx*fpair;
const double v1 = dy*dy*fpair;
const double v2 = dz*dz*fpair;
const double v3 = dx*dy*fpair;
const double v4 = dx*dz*fpair;
const double v5 = dy*dz*fpair;
const double v0 = dx * dx * fpair;
const double v1 = dy * dy * fpair;
const double v2 = dz * dz * fpair;
const double v3 = dx * dy * fpair;
const double v4 = dx * dz * fpair;
const double v5 = dy * dz * fpair;
if (newton || i < nlocal) {
virial[0] += v0; stress[i][0] += v0;
virial[1] += v1; stress[i][1] += v1;
virial[2] += v2; stress[i][2] += v2;
virial[3] += v3; stress[i][3] += v3;
virial[4] += v4; stress[i][4] += v4;
virial[5] += v5; stress[i][5] += v5;
virial[0] += v0;
stress[i][0] += v0;
virial[1] += v1;
stress[i][1] += v1;
virial[2] += v2;
stress[i][2] += v2;
virial[3] += v3;
stress[i][3] += v3;
virial[4] += v4;
stress[i][4] += v4;
virial[5] += v5;
stress[i][5] += v5;
}
if (newton || j < nlocal) {
virial[0] += v0; stress[j][0] += v0;
virial[1] += v1; stress[j][1] += v1;
virial[2] += v2; stress[j][2] += v2;
virial[3] += v3; stress[j][3] += v3;
virial[4] += v4; stress[j][4] += v4;
virial[5] += v5; stress[j][5] += v5;
virial[0] += v0;
stress[j][0] += v0;
virial[1] += v1;
stress[j][1] += v1;
virial[2] += v2;
stress[j][2] += v2;
virial[3] += v3;
stress[j][3] += v3;
virial[4] += v4;
stress[j][4] += v4;
virial[5] += v5;
stress[j][5] += v5;
}
}
}
@ -158,7 +163,7 @@ void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newto
int ComputeStressTally::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
int i, m, last;
m = 0;
last = first + n;
@ -177,7 +182,7 @@ int ComputeStressTally::pack_reverse_comm(int n, int first, double *buf)
void ComputeStressTally::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
int i, j, m;
m = 0;
for (i = 0; i < n; i++) {
@ -196,18 +201,17 @@ void ComputeStressTally::unpack_reverse_comm(int n, int *list, double *buf)
double ComputeStressTally::compute_scalar()
{
invoked_scalar = update->ntimestep;
if ((did_setup != invoked_scalar)
|| (update->eflag_global != invoked_scalar))
error->all(FLERR,"Energy was not tallied on needed timestep");
if ((did_setup != invoked_scalar) || (update->eflag_global != invoked_scalar))
error->all(FLERR, "Energy was not tallied on needed timestep");
// sum accumulated forces across procs
MPI_Allreduce(virial,vector,size_peratom_cols,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(virial, vector, size_peratom_cols, MPI_DOUBLE, MPI_SUM, world);
if (domain->dimension == 3)
scalar = (vector[0]+vector[1]+vector[2])/3.0;
scalar = (vector[0] + vector[1] + vector[2]) / 3.0;
else
scalar = (vector[0]+vector[1])/2.0;
scalar = (vector[0] + vector[1]) / 2.0;
return scalar;
}
@ -217,9 +221,8 @@ double ComputeStressTally::compute_scalar()
void ComputeStressTally::compute_peratom()
{
invoked_peratom = update->ntimestep;
if ((did_setup != invoked_peratom)
|| (update->eflag_global != invoked_peratom))
error->all(FLERR,"Energy was not tallied on needed timestep");
if ((did_setup != invoked_peratom) || (update->eflag_global != invoked_peratom))
error->all(FLERR, "Energy was not tallied on needed timestep");
// collect contributions from ghost atoms
@ -228,8 +231,7 @@ void ComputeStressTally::compute_peratom()
const int nall = atom->nlocal + atom->nghost;
for (int i = atom->nlocal; i < nall; ++i)
for (int j = 0; j < size_peratom_cols; ++j)
stress[i][j] = 0.0;
for (int j = 0; j < size_peratom_cols; ++j) stress[i][j] = 0.0;
}
// convert to stress*volume units = -pressure*volume
@ -251,7 +253,6 @@ void ComputeStressTally::compute_peratom()
double ComputeStressTally::memory_usage()
{
double bytes = (nmax < 0) ? 0 : nmax*size_peratom_cols * sizeof(double);
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
return bytes;
}

View File

@ -50,7 +50,7 @@ CUB_URL="https://github.com/NVlabs/cub/archive/1.12.0.tar.gz"
KOKKOS_URL="https://github.com/kokkos/kokkos/archive/3.4.01.tar.gz"
KIM_URL="https://s3.openkim.org/kim-api/kim-api-2.2.1.txz"
MSCG_URL="https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz"
PLUMED_URL="https://github.com/plumed/plumed2/releases/download/v2.7.1/plumed-src-2.7.1.tgz"
PLUMED_URL="https://github.com/plumed/plumed2/releases/download/v2.7.2/plumed-src-2.7.2.tgz"
PACELIB_URL="https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2021.4.9.tar.gz"
LATTE_URL="https://github.com/lanl/LATTE/archive/v1.2.2.tar.gz"
SCAFACOS_URL="https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz"

View File

@ -1,7 +1,7 @@
---
lammps_version: 2 Jul 2021
date_generated: Wed Jul 21 15:49:45 2021
epsilon: 1e-11
epsilon: 2e-11
prerequisites: ! |
pair reaxff
fix qeq/reaxff

View File

@ -1,7 +1,7 @@
---
lammps_version: 2 Jul 2021
date_generated: Wed Jul 21 15:49:47 2021
epsilon: 1e-12
epsilon: 3e-12
prerequisites: ! |
pair reaxff
fix qeq/reaxff

View File

@ -281,7 +281,6 @@ TEST_F(FileOperationsTest, error_message_warn)
TEST_F(FileOperationsTest, error_all_one)
{
char buf[64];
BEGIN_HIDE_OUTPUT();
command("echo none");
command("log none");

View File

@ -94,7 +94,8 @@ TEST(Tokenizer, copy_constructor)
TEST(Tokenizer, move_constructor)
{
Tokenizer u = std::move(Tokenizer("test new word ", " "));
Tokenizer t("test new word ", " ");
Tokenizer u = std::move(t);
ASSERT_THAT(u.next(), Eq("test"));
ASSERT_THAT(u.next(), Eq("new"));
ASSERT_THAT(u.next(), Eq("word"));
@ -248,7 +249,8 @@ TEST(ValueTokenizer, copy_constructor)
TEST(ValueTokenizer, move_constructor)
{
ValueTokenizer u = std::move(ValueTokenizer(" test new word ", " "));
ValueTokenizer t(" test new word ", " ");
ValueTokenizer u = std::move(t);
ASSERT_THAT(u.next_string(), Eq("test"));
ASSERT_THAT(u.next_string(), Eq("new"));
ASSERT_THAT(u.next_string(), Eq("word"));