Merge branch 'master' of github.com:lammps/lammps into kk_verlet
This commit is contained in:
@ -244,15 +244,16 @@ if(PKG_ADIOS)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
set(MPI_CXX_SKIP_MPICXX TRUE)
|
||||
find_package(MPI QUIET)
|
||||
option(BUILD_MPI "Build MPI version" ${MPI_FOUND})
|
||||
else()
|
||||
set(MPI_CXX_SKIP_MPICXX TRUE)
|
||||
option(BUILD_MPI "Build MPI version" OFF)
|
||||
endif()
|
||||
|
||||
if(BUILD_MPI)
|
||||
# do not include the (obsolete) MPI C++ bindings which makes
|
||||
# for leaner object files and avoids namespace conflicts
|
||||
set(MPI_CXX_SKIP_MPICXX TRUE)
|
||||
# We use a non-standard procedure to cross-compile with MPI on Windows
|
||||
if((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING)
|
||||
include(MPI4WIN)
|
||||
@ -368,6 +369,8 @@ if(PKG_MSCG OR PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_LATTE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# tweak jpeg library names to avoid linker errors with MinGW cross-compilation
|
||||
set(JPEG_NAMES libjpeg libjpeg-62)
|
||||
find_package(JPEG QUIET)
|
||||
option(WITH_JPEG "Enable JPEG support" ${JPEG_FOUND})
|
||||
if(WITH_JPEG)
|
||||
|
||||
@ -54,8 +54,8 @@ if(DOWNLOAD_PLUMED)
|
||||
set(PLUMED_BUILD_BYPRODUCTS "<INSTALL_DIR>/lib/libplumedWrapper.a")
|
||||
endif()
|
||||
|
||||
set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.7.1/plumed-src-2.7.1.tgz" CACHE STRING "URL for PLUMED tarball")
|
||||
set(PLUMED_MD5 "4eac6a462ec84dfe0cec96c82421b8e8" CACHE STRING "MD5 checksum of PLUMED tarball")
|
||||
set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.7.2/plumed-src-2.7.2.tgz" CACHE STRING "URL for PLUMED tarball")
|
||||
set(PLUMED_MD5 "cfa0b4dd90a81c25d3302e8d97bfeaea" CACHE STRING "MD5 checksum of PLUMED tarball")
|
||||
|
||||
mark_as_advanced(PLUMED_URL)
|
||||
mark_as_advanced(PLUMED_MD5)
|
||||
@ -72,7 +72,6 @@ if(DOWNLOAD_PLUMED)
|
||||
${PLUMED_CONFIG_OMP}
|
||||
CXX=${PLUMED_CONFIG_CXX}
|
||||
CC=${PLUMED_CONFIG_CC}
|
||||
PATCH_COMMAND sed -i "/^#include <algorithm>/a #include <limits>" <SOURCE_DIR>/src/lepton/Operation.h
|
||||
BUILD_BYPRODUCTS ${PLUMED_BUILD_BYPRODUCTS}
|
||||
)
|
||||
ExternalProject_get_property(plumed_build INSTALL_DIR)
|
||||
|
||||
@ -25,7 +25,7 @@ RasMol visualization programs. Pizza.py has tools that do interactive
|
||||
3d OpenGL visualization and one that creates SVG images of dump file
|
||||
snapshots.
|
||||
|
||||
.. _pizza: https://pizza.sandia.gov
|
||||
.. _pizza: https://lammps.github.io/pizza
|
||||
|
||||
.. _ensight: https://www.ansys.com/products/fluids/ansys-ensight
|
||||
|
||||
|
||||
@ -24,11 +24,15 @@ General features
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
* runs on a single processor or in parallel
|
||||
* distributed-memory message-passing parallelism (MPI)
|
||||
* spatial-decomposition of simulation domain for parallelism
|
||||
* open-source distribution
|
||||
* highly portable C++
|
||||
* optional libraries used: MPI and single-processor FFT
|
||||
* distributed memory message-passing parallelism (MPI)
|
||||
* shared memory multi-threading parallelism (OpenMP)
|
||||
* spatial decomposition of simulation domain for MPI parallelism
|
||||
* particle decomposition inside of spatial decomposition for OpenMP parallelism
|
||||
* GPLv2 licensed open-source distribution
|
||||
* highly portable C++-11
|
||||
* modular code with most functionality in optional packages
|
||||
* only depends on MPI library for basic parallel functionality
|
||||
* other libraries are optional and only required for specific packages
|
||||
* GPU (CUDA and OpenCL), Intel Xeon Phi, and OpenMP support for many code features
|
||||
* easy to extend with new features and functionality
|
||||
* runs from an input script
|
||||
@ -68,9 +72,9 @@ Interatomic potentials (force fields)
|
||||
:doc:`improper style <improper_style>`, :doc:`kspace style <kspace_style>`
|
||||
commands)
|
||||
|
||||
* pairwise potentials: Lennard-Jones, Buckingham, Morse, Born-Mayer-Huggins, Yukawa, soft, class 2 (COMPASS), hydrogen bond, tabulated
|
||||
* pairwise potentials: Lennard-Jones, Buckingham, Morse, Born-Mayer-Huggins, Yukawa, soft, class 2 (COMPASS), hydrogen bond, tabulated
|
||||
* charged pairwise potentials: Coulombic, point-dipole
|
||||
* many-body potentials: EAM, Finnis/Sinclair EAM, modified EAM (MEAM), embedded ion method (EIM), EDIP, ADP, Stillinger-Weber, Tersoff, REBO, AIREBO, ReaxFF, COMB, SNAP, Streitz-Mintmire, 3-body polymorphic
|
||||
* many-body potentials: EAM, Finnis/Sinclair EAM, modified EAM (MEAM), embedded ion method (EIM), EDIP, ADP, Stillinger-Weber, Tersoff, REBO, AIREBO, ReaxFF, COMB, SNAP, Streitz-Mintmire, 3-body polymorphic
|
||||
* long-range interactions for charge, point-dipoles, and LJ dispersion: Ewald, Wolf, PPPM (similar to particle-mesh Ewald)
|
||||
* polarization models: :doc:`QEq <fix_qeq>`, :doc:`core/shell model <Howto_coreshell>`, :doc:`Drude dipole model <Howto_drude>`
|
||||
* charge equilibration (QEq via dynamic, point, shielded, Slater methods)
|
||||
@ -170,9 +174,12 @@ Multi-replica models
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
* :doc:`nudged elastic band <neb>`
|
||||
* :doc:`hyperdynamics <hyper>`
|
||||
* :doc:`parallel replica dynamics <prd>`
|
||||
* :doc:`temperature accelerated dynamics <tad>`
|
||||
* :doc:`parallel tempering <temper>`
|
||||
* :doc:`path-integral MD <fix_pimd>`
|
||||
* multi-walker collective variables with :doc:`Colvars <fix_colvars>` and :doc:`Plumed <fix_plumed>`
|
||||
|
||||
.. _prepost:
|
||||
|
||||
@ -187,7 +194,7 @@ Pre- and post-processing
|
||||
plotting, and visualization for LAMMPS simulations. Pizza.py is
|
||||
written in `Python <python_>`_ and is available for download from `the Pizza.py WWW site <pizza_>`_.
|
||||
|
||||
.. _pizza: https://pizza.sandia.gov
|
||||
.. _pizza: https://lammps.github.io/pizza
|
||||
|
||||
.. _python: http://www.python.org
|
||||
|
||||
|
||||
@ -77,7 +77,7 @@ Here are suggestions on how to perform these tasks:
|
||||
it easier to analyze and plot. See the :doc:`Tools <Tools>` doc page
|
||||
for more discussion of the various tools.
|
||||
* **Pizza.py:** Our group has also written a separate toolkit called
|
||||
`Pizza.py <https://pizza.sandia.gov>`_ which can do certain kinds of
|
||||
`Pizza.py <https://lammps.github.io/pizza>`_ which can do certain kinds of
|
||||
setup, analysis, plotting, and visualization (via OpenGL) for LAMMPS
|
||||
simulations. It thus provides some functionality for several of the
|
||||
above bullets. Pizza.py is written in `Python <http://www.python.org>`_
|
||||
|
||||
@ -18,10 +18,11 @@ supercomputers.
|
||||
.. _mpi: https://en.wikipedia.org/wiki/Message_Passing_Interface
|
||||
.. _lws: https://www.lammps.org
|
||||
|
||||
LAMMPS is written in C++. Earlier versions were written in F77 and
|
||||
F90. See the `History page <https://www.lammps.org/history.html>`_ of
|
||||
the website for details. All versions can be downloaded from the
|
||||
`LAMMPS website <lws_>`_.
|
||||
LAMMPS is written in C++ and requires a compiler that is at least
|
||||
compatible with the C++-11 standard.
|
||||
Earlier versions were written in F77 and F90. See the `History page
|
||||
<https://www.lammps.org/history.html>`_ of the website for details. All
|
||||
versions can be downloaded from the `LAMMPS website <lws_>`_.
|
||||
|
||||
LAMMPS is designed to be easy to modify or extend with new
|
||||
capabilities, such as new force fields, atom types, boundary
|
||||
@ -41,8 +42,9 @@ short distances, so that the local density of particles never becomes
|
||||
too large. This is in contrast to methods used for modeling plasma
|
||||
or gravitational bodies (e.g. galaxy formation).
|
||||
|
||||
On parallel machines, LAMMPS uses spatial-decomposition techniques to
|
||||
partition the simulation domain into small sub-domains of equal
|
||||
computational cost, one of which is assigned to each processor.
|
||||
Processors communicate and store "ghost" atom information for atoms
|
||||
that border their sub-domain.
|
||||
On parallel machines, LAMMPS uses spatial-decomposition techniques with
|
||||
MPI parallelization to partition the simulation domain into small
|
||||
sub-domains of equal computational cost, one of which is assigned to
|
||||
each processor. Processors communicate and store "ghost" atom
|
||||
information for atoms that border their sub-domain. Multi-threading
|
||||
parallelization with with particle-decomposition can be used in addition.
|
||||
|
||||
@ -35,9 +35,9 @@ visualization package you have installed.
|
||||
Note that for GL, you need to be able to run the Pizza.py GL tool,
|
||||
which is included in the pizza sub-directory. See the Pizza.py doc pages for more info:
|
||||
|
||||
* `https://pizza.sandia.gov <pizza_>`_
|
||||
* `https://lammps.github.io/pizza <pizza_>`_
|
||||
|
||||
.. _pizza: https://pizza.sandia.gov
|
||||
.. _pizza: https://lammps.github.io/pizza
|
||||
|
||||
Note that for AtomEye, you need version 3, and there is a line in the
|
||||
scripts that specifies the path and name of the executable. See the
|
||||
|
||||
@ -15,7 +15,7 @@ Sandia which provides tools for doing setup, analysis, plotting, and
|
||||
visualization for LAMMPS simulations.
|
||||
|
||||
.. _lws: https://www.lammps.org
|
||||
.. _pizza: https://pizza.sandia.gov
|
||||
.. _pizza: https://lammps.github.io/pizza
|
||||
.. _python: https://www.python.org
|
||||
|
||||
Additional tools included in the LAMMPS distribution are described on
|
||||
|
||||
@ -558,7 +558,7 @@ Related commands
|
||||
:doc:`group <group>`, :doc:`processors <processors>`,
|
||||
:doc:`fix balance <fix_balance>`, :doc:`comm_style <comm_style>`
|
||||
|
||||
.. _pizza: https://pizza.sandia.gov
|
||||
.. _pizza: https://lammps.github.io/pizza
|
||||
|
||||
Default
|
||||
"""""""
|
||||
|
||||
@ -119,8 +119,7 @@ The per-atom vector values will be an ID > 0, as explained above.
|
||||
Restrictions
|
||||
""""""""""""
|
||||
|
||||
These computes are part of the EXTRA-COMPUTE package. They are only enabled if
|
||||
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
|
||||
none
|
||||
|
||||
Related commands
|
||||
""""""""""""""""
|
||||
|
||||
@ -182,8 +182,7 @@ page for an overview of LAMMPS output options.
|
||||
Restrictions
|
||||
""""""""""""
|
||||
|
||||
This compute is part of the EXTRA-COMPUTE package. It is only enabled if
|
||||
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
|
||||
none
|
||||
|
||||
Related commands
|
||||
""""""""""""""""
|
||||
|
||||
@ -230,7 +230,7 @@ individual values and the file itself.
|
||||
The *atom*, *local*, and *custom* styles create files in a simple text
|
||||
format that is self-explanatory when viewing a dump file. Some of the
|
||||
LAMMPS post-processing tools described on the :doc:`Tools <Tools>` doc
|
||||
page, including `Pizza.py <https://pizza.sandia.gov>`_,
|
||||
page, including `Pizza.py <https://lammps.github.io/pizza>`_,
|
||||
work with this format, as does the :doc:`rerun <rerun>` command.
|
||||
|
||||
For post-processing purposes the *atom*, *local*, and *custom* text
|
||||
|
||||
@ -590,8 +590,8 @@ Play the movie:
|
||||
% mplayer foo.mpg
|
||||
% ffplay bar.avi
|
||||
|
||||
* c) Use the `Pizza.py <https://pizza.sandia.gov>`_
|
||||
`animate tool <https://pizza.sandia.gov/doc/animate.html>`_,
|
||||
* c) Use the `Pizza.py <https://lammps.github.io/pizza>`_
|
||||
`animate tool <https://lammps.github.io/pizza/doc/animate.html>`_,
|
||||
which works directly on a series of image files.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@ -403,7 +403,7 @@ Related commands
|
||||
:doc:`group <group>`, :doc:`processors <processors>`, :doc:`balance <balance>`,
|
||||
:doc:`comm_style <comm_style>`
|
||||
|
||||
.. _pizza: https://pizza.sandia.gov
|
||||
.. _pizza: https://lammps.github.io/pizza
|
||||
|
||||
Default
|
||||
"""""""
|
||||
|
||||
@ -89,7 +89,7 @@ first stage) is changed to:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
Fi = -Grad(V) + 2 (Grad(V) dot T') T'
|
||||
Fi = -Grad(V) + 2 (Grad(V) dot T') T' + Fnudge_perp
|
||||
|
||||
and the relaxation procedure is continued to a new converged MEP.
|
||||
|
||||
|
||||
@ -53,6 +53,7 @@ checksums = { \
|
||||
'2.6.3' : 'a9f8028fd74528c2024781ea1fdefeee', \
|
||||
'2.7.0' : '95f29dd0c067577f11972ff90dfc7d12', \
|
||||
'2.7.1' : '4eac6a462ec84dfe0cec96c82421b8e8', \
|
||||
'2.7.2' : 'cfa0b4dd90a81c25d3302e8d97bfeaea', \
|
||||
}
|
||||
|
||||
# parse and process arguments
|
||||
|
||||
@ -1823,7 +1823,6 @@ class lammps(object):
|
||||
|
||||
with ExceptionCheck(self):
|
||||
return self.lib.lammps_fix_external_get_force(self.lmp, fix_id.encode())
|
||||
return None
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -647,7 +647,6 @@ void PPPMDispDielectric::fieldforce_c_ad()
|
||||
|
||||
// convert E-field to force and substract self forces
|
||||
const double qfactor = qqrd2e * scale;
|
||||
double qtmp = eps[i]*q[i];
|
||||
|
||||
s1 = x[i][0]*hx_inv;
|
||||
s2 = x[i][1]*hy_inv;
|
||||
@ -751,7 +750,7 @@ void PPPMDispDielectric::fieldforce_c_peratom()
|
||||
extended to non-neutral systems (J. Chem. Phys. 131, 094107).
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMDispDielectric::slabcorr(int eflag)
|
||||
void PPPMDispDielectric::slabcorr(int /*eflag*/)
|
||||
{
|
||||
// compute local contribution to global dipole moment
|
||||
|
||||
|
||||
@ -116,7 +116,7 @@ ComputeHMA::ComputeHMA(LAMMPS *lmp, int narg, char **arg) :
|
||||
computeU = computeP = computeCv = -1;
|
||||
returnAnharmonic = 0;
|
||||
size_vector = 0;
|
||||
memory->create(extlist, 3, "hma:extlist");
|
||||
extlist = new int[3];
|
||||
for (int iarg=4; iarg<narg; iarg++) {
|
||||
if (!strcmp(arg[iarg], "u")) {
|
||||
if (computeU>-1) continue;
|
||||
@ -145,20 +145,11 @@ ComputeHMA::ComputeHMA(LAMMPS *lmp, int narg, char **arg) :
|
||||
}
|
||||
}
|
||||
|
||||
if (size_vector == 0) {
|
||||
error->all(FLERR,"Illegal compute hma command");
|
||||
}
|
||||
if (size_vector<3) {
|
||||
memory->grow(extlist, size_vector, "hma:extlist");
|
||||
}
|
||||
memory->create(vector, size_vector, "hma:vector");
|
||||
if (size_vector == 0) error->all(FLERR,"Illegal compute hma command");
|
||||
vector = new double[size_vector];
|
||||
|
||||
if (computeU>-1 || computeCv>-1) {
|
||||
peflag = 1;
|
||||
}
|
||||
if (computeP>-1) {
|
||||
pressflag = 1;
|
||||
}
|
||||
if (computeU>-1 || computeCv>-1) peflag = 1;
|
||||
if (computeP>-1) pressflag = 1;
|
||||
|
||||
nmax = 0;
|
||||
}
|
||||
@ -170,10 +161,11 @@ ComputeHMA::~ComputeHMA()
|
||||
// check nfix in case all fixes have already been deleted
|
||||
if (modify->nfix) modify->delete_fix(id_fix);
|
||||
|
||||
delete [] id_fix;
|
||||
delete [] id_temp;
|
||||
memory->destroy(extlist);
|
||||
memory->destroy(vector);
|
||||
delete[] id_fix;
|
||||
delete[] id_temp;
|
||||
delete[] extlist;
|
||||
delete[] vector;
|
||||
|
||||
memory->destroy(deltaR);
|
||||
}
|
||||
|
||||
|
||||
@ -162,7 +162,11 @@ void AngleCharmmIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n ++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -246,7 +250,11 @@ void AngleCharmmIntel::eval(const int vflag,
|
||||
// apply force to each of 3 atoms
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -162,7 +162,11 @@ void AngleHarmonicIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n ++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -228,7 +232,11 @@ void AngleHarmonicIntel::eval(const int vflag,
|
||||
// apply force to each of 3 atoms
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -158,7 +158,11 @@ void BondFENEIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n ++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -215,7 +219,11 @@ void BondFENEIntel::eval(const int vflag,
|
||||
// apply force to each of 2 atoms
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -155,7 +155,11 @@ void BondHarmonicIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n ++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -184,7 +188,11 @@ void BondHarmonicIntel::eval(const int vflag,
|
||||
|
||||
// apply force to each of 2 atoms
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -181,9 +181,16 @@ void DihedralCharmmIntel::eval(const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER_TEST)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:sedihedral, sevdwl, secoul, sv0, sv1, sv2, \
|
||||
sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, \
|
||||
spv5)
|
||||
#else
|
||||
#pragma simd reduction(+:sedihedral, sevdwl, secoul, sv0, sv1, sv2, \
|
||||
sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, spv5)
|
||||
sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, \
|
||||
spv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
for (int n = nfrom; n < nto; n++) {
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -329,7 +336,11 @@ void DihedralCharmmIntel::eval(const int vflag,
|
||||
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER_TEST)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i2 < nlocal) {
|
||||
@ -408,7 +419,11 @@ void DihedralCharmmIntel::eval(const int vflag,
|
||||
|
||||
// apply force to each of 4 atoms
|
||||
#if defined(LMP_SIMD_COMPILER_TEST)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -154,7 +154,11 @@ void DihedralFourierIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n ++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -304,7 +308,11 @@ void DihedralFourierIntel::eval(const int vflag,
|
||||
}
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -154,7 +154,11 @@ void DihedralHarmonicIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n ++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -299,7 +303,11 @@ void DihedralHarmonicIntel::eval(const int vflag,
|
||||
}
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -158,7 +158,11 @@ void DihedralOPLSIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n ++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -319,7 +323,11 @@ void DihedralOPLSIntel::eval(const int vflag,
|
||||
}
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -635,19 +635,31 @@ void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
|
||||
if (_nthreads == 4) {
|
||||
acc_t *f_scalar3 = f_scalar2 + f_stride4;
|
||||
acc_t *f_scalar4 = f_scalar3 + f_stride4;
|
||||
_use_simd_pragma("vector aligned")
|
||||
_use_simd_pragma("simd")
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd aligned(f_scalar,f_scalar2,f_scalar3,f_scalar4:64)
|
||||
#elif defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int n = 0; n < o_range; n++)
|
||||
f_scalar[n] += f_scalar2[n] + f_scalar3[n] + f_scalar4[n];
|
||||
} else if (_nthreads == 2) {
|
||||
_use_simd_pragma("vector aligned")
|
||||
_use_simd_pragma("simd")
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd aligned(f_scalar,f_scalar2:64)
|
||||
#elif defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int n = 0; n < o_range; n++)
|
||||
f_scalar[n] += f_scalar2[n];
|
||||
} else {
|
||||
acc_t *f_scalar3 = f_scalar2 + f_stride4;
|
||||
_use_simd_pragma("vector aligned")
|
||||
_use_simd_pragma("simd")
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd aligned(f_scalar,f_scalar2,f_scalar3:64)
|
||||
#elif defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int n = 0; n < o_range; n++)
|
||||
f_scalar[n] += f_scalar2[n] + f_scalar3[n];
|
||||
}
|
||||
@ -662,8 +674,12 @@ void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
|
||||
|
||||
acc_t *f_scalar2 = f_scalar + f_stride4;
|
||||
for (int t = 1; t < _nthreads; t++) {
|
||||
_use_simd_pragma("vector aligned")
|
||||
_use_simd_pragma("simd")
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd aligned(f_scalar,f_scalar2:64)
|
||||
#elif defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int n = iifrom; n < iito; n++)
|
||||
f_scalar[n] += f_scalar2[n];
|
||||
f_scalar2 += f_stride4;
|
||||
|
||||
@ -99,8 +99,12 @@ void FixNHIntel::remap()
|
||||
|
||||
if (allremap) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
const double d0 = x[i].x - b0;
|
||||
@ -112,8 +116,12 @@ void FixNHIntel::remap()
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & dilate_group_bit) {
|
||||
@ -278,8 +286,12 @@ void FixNHIntel::remap()
|
||||
|
||||
if (allremap) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
|
||||
@ -288,8 +300,12 @@ void FixNHIntel::remap()
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & dilate_group_bit) {
|
||||
@ -415,8 +431,12 @@ void FixNHIntel::nh_v_press()
|
||||
|
||||
if (igroup == 0) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
v[i].x *= f0;
|
||||
@ -425,8 +445,12 @@ void FixNHIntel::nh_v_press()
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
@ -448,8 +472,12 @@ void FixNHIntel::nve_v()
|
||||
double * _noalias const v = atom->v[0];
|
||||
const double * _noalias const f = atom->f[0];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++)
|
||||
v[i] += _dtfm[i] * f[i];
|
||||
@ -468,15 +496,23 @@ void FixNHIntel::nve_x()
|
||||
|
||||
if (igroup == 0) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++)
|
||||
x[i] += dtv * v[i];
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
if (_dtfm[i] != 0.0)
|
||||
@ -500,15 +536,23 @@ void FixNHIntel::nh_v_temp()
|
||||
|
||||
if (igroup == 0) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++)
|
||||
v[i] *= factor_eta;
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
if (_dtfm[i] != 0.0)
|
||||
|
||||
@ -97,8 +97,12 @@ void FixNVEAsphereIntel::initial_integrate(int /*vflag*/)
|
||||
dtq = 0.5 * dtv;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
v[i] += _dtfm[i] * f[i];
|
||||
@ -108,8 +112,12 @@ void FixNVEAsphereIntel::initial_integrate(int /*vflag*/)
|
||||
// update angular momentum by 1/2 step
|
||||
if (igroup == 0) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
double *quat = bonus[ellipsoid[i]].quat;
|
||||
@ -118,8 +126,12 @@ void FixNVEAsphereIntel::initial_integrate(int /*vflag*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
@ -143,8 +155,12 @@ void FixNVEAsphereIntel::final_integrate()
|
||||
const double * _noalias const torque = atom->torque[0];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
v[i] += _dtfm[i] * f[i];
|
||||
|
||||
@ -68,8 +68,12 @@ void FixNVEIntel::initial_integrate(int /*vflag*/)
|
||||
if (igroup == 0 && atom->ntypes == 1 && !atom->rmass) {
|
||||
const double dtfm = dtf / atom->mass[1];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
v[i] += dtfm * f[i];
|
||||
@ -78,8 +82,12 @@ void FixNVEIntel::initial_integrate(int /*vflag*/)
|
||||
} else if (igroup == 0) {
|
||||
if (neighbor->ago == 0) reset_dt();
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
v[i] += _dtfm[i] * f[i];
|
||||
@ -88,8 +96,12 @@ void FixNVEIntel::initial_integrate(int /*vflag*/)
|
||||
} else {
|
||||
if (neighbor->ago == 0) reset_dt();
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
if (_dtfm[i] != 0.0) {
|
||||
@ -112,16 +124,24 @@ void FixNVEIntel::final_integrate()
|
||||
_nlocal3 = 3 * atom->nlocal;
|
||||
const double dtfm = dtf / atom->mass[1];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++)
|
||||
v[i] += dtfm * f[i];
|
||||
} else if (igroup == 0) {
|
||||
if (neighbor->ago == 0) reset_dt();
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++) {
|
||||
v[i] += _dtfm[i] * f[i];
|
||||
@ -129,8 +149,12 @@ void FixNVEIntel::final_integrate()
|
||||
} else {
|
||||
if (neighbor->ago == 0) reset_dt();
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < _nlocal3; i++)
|
||||
v[i] += _dtfm[i] * f[i];
|
||||
|
||||
@ -165,7 +165,11 @@ void ImproperCvffIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -247,7 +251,11 @@ void ImproperCvffIntel::eval(const int vflag,
|
||||
|
||||
flt_t p, pd;
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF_FIX
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (m == 2) {
|
||||
@ -319,7 +327,11 @@ void ImproperCvffIntel::eval(const int vflag,
|
||||
// apply force to each of 4 atoms
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF_FIX
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -167,7 +167,11 @@ void ImproperHarmonicIntel::eval(const int vflag,
|
||||
if (VFLAG && vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int n = nfrom; n < nto; n++) {
|
||||
#else
|
||||
for (int n = nfrom; n < nto; n += npl) {
|
||||
@ -276,7 +280,11 @@ void ImproperHarmonicIntel::eval(const int vflag,
|
||||
// apply force to each of 4 atoms
|
||||
|
||||
#ifdef LMP_INTEL_USE_SIMDOFF
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp ordered simd
|
||||
#else
|
||||
#pragma simdoff
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
|
||||
@ -127,7 +127,8 @@ struct vector_ops<double, KNC> {
|
||||
}
|
||||
template<int scale>
|
||||
static fvec gather(const fvec &from, bvec mask, const ivec &idx, const void *base) {
|
||||
return _mm512_mask_i32logather_pd(from, mask, idx, base, scale);
|
||||
return _mm512_mask_i32gather_pd(from, mask, _mm512_castsi512_si256(idx),
|
||||
base, scale);
|
||||
}
|
||||
static fvec blend(const bvec &mask, const fvec &a, const fvec &b) {
|
||||
return _mm512_mask_blend_pd(mask, a, b);
|
||||
|
||||
@ -511,7 +511,8 @@ public:
|
||||
const int scale) {
|
||||
assert(scale == sizeof(FVEC_SCAL_T));
|
||||
# if FVEC_LEN==8
|
||||
return FVEC_SUFFIX(_mm512_i32logather_)(idx.val_, mem, sizeof(FVEC_SCAL_T));
|
||||
return FVEC_SUFFIX(_mm512_i32gather_)(_mm512_castsi512_si256(idx.val_),
|
||||
mem, sizeof(FVEC_SCAL_T));
|
||||
# else
|
||||
return FVEC_SUFFIX(_mm512_i32gather_)(idx.val_, mem, sizeof(FVEC_SCAL_T));
|
||||
# endif
|
||||
@ -522,8 +523,8 @@ public:
|
||||
) {
|
||||
assert(scale == sizeof(FVEC_SCAL_T));
|
||||
# if FVEC_LEN==8
|
||||
return FVEC_SUFFIX(_mm512_mask_i32logather_)(src.val_, mask.val_, idx.val_,
|
||||
mem, sizeof(FVEC_SCAL_T));
|
||||
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_,
|
||||
_mm512_castsi512_si256(idx.val_), mem, sizeof(FVEC_SCAL_T));
|
||||
# else
|
||||
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_, idx.val_,
|
||||
mem, sizeof(FVEC_SCAL_T));
|
||||
@ -609,8 +610,8 @@ public:
|
||||
) {
|
||||
assert(scale == sizeof(FVEC_SCAL_T));
|
||||
# if FVEC_LEN==8
|
||||
return FVEC_SUFFIX(_mm512_mask_i32logather_)(src.val_, mask.val_, idx.val_,
|
||||
mem, sizeof(FVEC_SCAL_T));
|
||||
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_,
|
||||
_mm512_castsi512_si256(idx.val_), mem, sizeof(FVEC_SCAL_T));
|
||||
# else
|
||||
return FVEC_SUFFIX(_mm512_mask_i32gather_)(src.val_, mask.val_, idx.val_,
|
||||
mem, sizeof(FVEC_SCAL_T));
|
||||
@ -622,8 +623,9 @@ public:
|
||||
) {
|
||||
assert(scale == sizeof(FVEC_SCAL_T));
|
||||
# if FVEC_LEN==8
|
||||
FVEC_SUFFIX(_mm512_mask_i32loscatter_)(mem, mask.val_, idx.val_, a.val_,
|
||||
sizeof(FVEC_SCAL_T));
|
||||
FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_,
|
||||
_mm512_castsi512_si256(idx.val_),
|
||||
a.val_, sizeof(FVEC_SCAL_T));
|
||||
# else
|
||||
FVEC_SUFFIX(_mm512_mask_i32scatter_)(mem, mask.val_, idx.val_, a.val_,
|
||||
sizeof(FVEC_SCAL_T));
|
||||
@ -666,11 +668,11 @@ public:
|
||||
const double * mem, const int scale
|
||||
) {
|
||||
assert(scale == sizeof(double));
|
||||
__m512d lo = _mm512_mask_i32logather_pd(src.lo_, mask.val_, idx.val_, mem,
|
||||
sizeof(double));
|
||||
__m512d hi = _mm512_mask_i32logather_pd(src.hi_, get_bvec_hi(mask.val_),
|
||||
get_ivec_hi(idx.val_), mem,
|
||||
sizeof(double));
|
||||
__m512d lo = _mm512_mask_i32gather_pd(src.lo_, mask.val_,
|
||||
_mm512_castsi512_si256(idx.val_),
|
||||
mem, sizeof(double));
|
||||
__m512d hi = _mm512_mask_i32gather_pd(src.hi_, get_bvec_hi(mask.val_),
|
||||
_mm512_castsi512_si256(get_ivec_hi(idx.val_)), mem, sizeof(double));
|
||||
return avec16pd(lo, hi);
|
||||
}
|
||||
VEC_INLINE static void mask_i32loscatter(
|
||||
@ -678,10 +680,12 @@ public:
|
||||
const avec16pd &a, const int scale
|
||||
) {
|
||||
assert(scale == sizeof(double));
|
||||
_mm512_mask_i32loscatter_pd(mem, mask.val_, idx.val_, a.lo_,
|
||||
sizeof(double));
|
||||
_mm512_mask_i32loscatter_pd(mem, get_bvec_hi(mask.val_),
|
||||
get_ivec_hi(idx.val_), a.hi_, sizeof(double));
|
||||
_mm512_mask_i32scatter_pd(mem, mask.val_,
|
||||
_mm512_castsi512_si256(idx.val_), a.lo_,
|
||||
sizeof(double));
|
||||
_mm512_mask_i32scatter_pd(mem, get_bvec_hi(mask.val_),
|
||||
_mm512_castsi512_si256(get_ivec_hi(idx.val_)),
|
||||
a.hi_, sizeof(double));
|
||||
}
|
||||
|
||||
#define AVEC2_BINOP(the_sym, the_name) \
|
||||
|
||||
@ -17,8 +17,13 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef __INTEL_LLVM_COMPILER
|
||||
#define USE_OMP_SIMD
|
||||
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
|
||||
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
|
||||
#define _MM_SCALE_1 1
|
||||
#define _MM_SCALE_2 2
|
||||
#define _MM_SCALE_4 4
|
||||
#define _MM_SCALE_8 8
|
||||
#endif
|
||||
|
||||
#ifdef __INTEL_COMPILER
|
||||
@ -332,6 +337,9 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
||||
|
||||
#endif
|
||||
|
||||
// TO BE DEPRECATED
|
||||
#ifndef USE_OMP_SIMD
|
||||
|
||||
#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \
|
||||
f_stride, pos, ov0, ov1, ov2, \
|
||||
ov3, ov4, ov5) \
|
||||
@ -526,6 +534,198 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
||||
} \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \
|
||||
f_stride, pos, ov0, ov1, ov2, \
|
||||
ov3, ov4, ov5) \
|
||||
{ \
|
||||
acc_t *f_scalar = &f_start[0].x; \
|
||||
flt_t *x_scalar = &pos[minlocal].x; \
|
||||
int f_stride4 = f_stride * 4; \
|
||||
_alignvar(acc_t ovv[16],64); \
|
||||
int vwidth; \
|
||||
if (sizeof(acc_t) == sizeof(double)) \
|
||||
vwidth = INTEL_COMPILE_WIDTH/2; \
|
||||
else \
|
||||
vwidth = INTEL_COMPILE_WIDTH; \
|
||||
if (vwidth < 4) vwidth = 4; \
|
||||
_use_simd_pragma("omp simd aligned(ovv:64)") \
|
||||
for (int v = 0; v < vwidth; v++) ovv[v] = (acc_t)0.0; \
|
||||
int remainder = lt % vwidth; \
|
||||
if (lf > lt) remainder = 0; \
|
||||
const int v_range = lt - remainder; \
|
||||
if (nthreads == 2) { \
|
||||
acc_t *f_scalar2 = f_scalar + f_stride4; \
|
||||
for (int n = lf; n < v_range; n += vwidth) { \
|
||||
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2,ovv,x_scalar:64)")\
|
||||
for (int v = 0; v < vwidth; v++) { \
|
||||
f_scalar[n+v] += f_scalar2[n+v]; \
|
||||
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
|
||||
} \
|
||||
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
|
||||
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
|
||||
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
|
||||
if (vwidth > 4) { \
|
||||
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
|
||||
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
|
||||
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
|
||||
} \
|
||||
if (vwidth > 8) { \
|
||||
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
|
||||
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
|
||||
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
|
||||
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
|
||||
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
|
||||
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
|
||||
} \
|
||||
} \
|
||||
_use_simd_pragma("vector aligned") \
|
||||
_use_simd_pragma("ivdep") \
|
||||
_use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \
|
||||
for (int n = v_range; n < lt; n++) \
|
||||
f_scalar[n] += f_scalar2[n]; \
|
||||
} else if (nthreads==4) { \
|
||||
acc_t *f_scalar2 = f_scalar + f_stride4; \
|
||||
acc_t *f_scalar3 = f_scalar2 + f_stride4; \
|
||||
acc_t *f_scalar4 = f_scalar3 + f_stride4; \
|
||||
for (int n = lf; n < v_range; n += vwidth) { \
|
||||
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2,f_scalar3,f_scalar4,ovv:64)") \
|
||||
for (int v = 0; v < vwidth; v++) { \
|
||||
f_scalar[n+v] += f_scalar2[n+v] + f_scalar3[n+v] + \
|
||||
f_scalar4[n+v]; \
|
||||
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
|
||||
} \
|
||||
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
|
||||
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
|
||||
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
|
||||
if (vwidth > 4) { \
|
||||
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
|
||||
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
|
||||
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
|
||||
} \
|
||||
if (vwidth > 8) { \
|
||||
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
|
||||
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
|
||||
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
|
||||
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
|
||||
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
|
||||
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
|
||||
} \
|
||||
} \
|
||||
_use_simd_pragma("vector aligned") \
|
||||
_use_simd_pragma("ivdep") \
|
||||
_use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \
|
||||
for (int n = v_range; n < lt; n++) \
|
||||
f_scalar[n] += f_scalar2[n] + f_scalar3[n] + f_scalar4[n]; \
|
||||
} else if (nthreads==1) { \
|
||||
for (int n = lf; n < v_range; n += vwidth) { \
|
||||
_use_simd_pragma("omp simd aligned(ovv,f_scalar,x_scalar:64)") \
|
||||
for (int v = 0; v < vwidth; v++) \
|
||||
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
|
||||
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
|
||||
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
|
||||
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
|
||||
if (vwidth > 4) { \
|
||||
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
|
||||
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
|
||||
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
|
||||
} \
|
||||
if (vwidth > 8) { \
|
||||
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
|
||||
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
|
||||
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
|
||||
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
|
||||
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
|
||||
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
|
||||
} \
|
||||
} \
|
||||
} else if (nthreads==3) { \
|
||||
acc_t *f_scalar2 = f_scalar + f_stride4; \
|
||||
acc_t *f_scalar3 = f_scalar2 + f_stride4; \
|
||||
for (int n = lf; n < v_range; n += vwidth) { \
|
||||
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2,f_scalar3,ovv,x_scalar:64)") \
|
||||
for (int v = 0; v < vwidth; v++) { \
|
||||
f_scalar[n+v] += f_scalar2[n+v] + f_scalar3[n+v]; \
|
||||
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
|
||||
} \
|
||||
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
|
||||
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
|
||||
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
|
||||
if (vwidth > 4) { \
|
||||
ov3 += f_scalar[n+5] * x_scalar[n+4]; \
|
||||
ov4 += f_scalar[n+6] * x_scalar[n+4]; \
|
||||
ov5 += f_scalar[n+6] * x_scalar[n+5]; \
|
||||
} \
|
||||
if (vwidth > 8) { \
|
||||
ov3 += f_scalar[n+9] * x_scalar[n+8]; \
|
||||
ov3 += f_scalar[n+13] * x_scalar[n+12]; \
|
||||
ov4 += f_scalar[n+10] * x_scalar[n+8]; \
|
||||
ov4 += f_scalar[n+14] * x_scalar[n+12]; \
|
||||
ov5 += f_scalar[n+10] * x_scalar[n+9]; \
|
||||
ov5 += f_scalar[n+14] * x_scalar[n+13]; \
|
||||
} \
|
||||
} \
|
||||
_use_simd_pragma("vector aligned") \
|
||||
_use_simd_pragma("ivdep") \
|
||||
_use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \
|
||||
for (int n = v_range; n < lt; n++) \
|
||||
f_scalar[n] += f_scalar2[n] + f_scalar3[n]; \
|
||||
} \
|
||||
for (int n = v_range; n < lt; n += 4) { \
|
||||
_use_simd_pragma("vector aligned") \
|
||||
_use_simd_pragma("ivdep") \
|
||||
for (int v = 0; v < 4; v++) \
|
||||
ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \
|
||||
ov3 += f_scalar[n+1] * x_scalar[n+0]; \
|
||||
ov4 += f_scalar[n+2] * x_scalar[n+0]; \
|
||||
ov5 += f_scalar[n+2] * x_scalar[n+1]; \
|
||||
} \
|
||||
ov0 += ovv[0]; \
|
||||
ov1 += ovv[1]; \
|
||||
ov2 += ovv[2]; \
|
||||
if (vwidth > 4) { \
|
||||
ov0 += ovv[4]; \
|
||||
ov1 += ovv[5]; \
|
||||
ov2 += ovv[6]; \
|
||||
} \
|
||||
if (vwidth > 8) { \
|
||||
ov0 += ovv[8] + ovv[12]; \
|
||||
ov1 += ovv[9] + ovv[13]; \
|
||||
ov2 += ovv[10] + ovv[14]; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define IP_PRE_fdotr_acc_force(nall, minlocal, nthreads, f_start, \
|
||||
f_stride, pos, offload, vflag, ov0, ov1, \
|
||||
ov2, ov3, ov4, ov5) \
|
||||
{ \
|
||||
int o_range = (nall - minlocal) * 4; \
|
||||
IP_PRE_omp_range_id_align(iifrom, iito, tid, o_range, nthreads, \
|
||||
sizeof(acc_t)); \
|
||||
\
|
||||
acc_t *f_scalar = &f_start[0].x; \
|
||||
int f_stride4 = f_stride * 4; \
|
||||
int t; \
|
||||
if (vflag == VIRIAL_FDOTR) t = 4; else t = 1; \
|
||||
acc_t *f_scalar2 = f_scalar + f_stride4 * t; \
|
||||
for ( ; t < nthreads; t++) { \
|
||||
_use_simd_pragma("omp simd aligned(f_scalar,f_scalar2:64)") \
|
||||
for (int n = iifrom; n < iito; n++) \
|
||||
f_scalar[n] += f_scalar2[n]; \
|
||||
f_scalar2 += f_stride4; \
|
||||
} \
|
||||
\
|
||||
if (vflag == VIRIAL_FDOTR) { \
|
||||
int nt_min = MIN(4,nthreads); \
|
||||
IP_PRE_fdotr_acc_force_l5(iifrom, iito, minlocal, nt_min, f_start, \
|
||||
f_stride, pos, ov0, ov1, ov2, ov3, ov4, \
|
||||
ov5); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
#include <sys/time.h>
|
||||
|
||||
|
||||
@ -173,7 +173,7 @@ namespace ip_simd {
|
||||
}
|
||||
|
||||
inline SIMD_double SIMD_gather(const double *p, const SIMD_int &i) {
|
||||
return _mm512_i32logather_pd(i, p, _MM_SCALE_8);
|
||||
return _mm512_i32gather_pd(_mm512_castsi512_si256(i), p, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
inline SIMD_int SIMD_gather(const SIMD_mask &m, const int *p,
|
||||
@ -190,8 +190,8 @@ namespace ip_simd {
|
||||
|
||||
inline SIMD_double SIMD_gather(const SIMD_mask &m, const double *p,
|
||||
const SIMD_int &i) {
|
||||
return _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, p,
|
||||
_MM_SCALE_8);
|
||||
return _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), p, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -227,8 +227,8 @@ namespace ip_simd {
|
||||
|
||||
inline SIMD_double SIMD_gatherz(const SIMD_mask &m, const double *p,
|
||||
const SIMD_int &i) {
|
||||
return _mm512_mask_i32logather_pd( _mm512_set1_pd(0.0), m, i, p,
|
||||
_MM_SCALE_8);
|
||||
return _mm512_mask_i32gather_pd( _mm512_set1_pd(0.0), m,
|
||||
_mm512_castsi512_si256(i),p, _MM_SCALE_8);
|
||||
}
|
||||
|
||||
// ------- Store Operations
|
||||
@ -257,7 +257,8 @@ namespace ip_simd {
|
||||
|
||||
inline void SIMD_scatter(const SIMD_mask &m, double *p,
|
||||
const SIMD_int &i, const SIMD_double &vec) {
|
||||
_mm512_mask_i32loscatter_pd(p, m, i, vec, _MM_SCALE_8);
|
||||
_mm512_mask_i32scatter_pd(p, m, _mm512_castsi512_si256(i), vec,
|
||||
_MM_SCALE_8);
|
||||
}
|
||||
|
||||
// ------- Arithmetic Operations
|
||||
@ -834,23 +835,29 @@ namespace ip_simd {
|
||||
inline void SIMD_atom_gather(const SIMD_mask &m, const double *atom,
|
||||
const SIMD_int &i, SIMD_double &x,
|
||||
SIMD_double &y, SIMD_double &z) {
|
||||
x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom,
|
||||
_MM_SCALE_2);
|
||||
y = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+1,
|
||||
_MM_SCALE_2);
|
||||
z = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+2,
|
||||
_MM_SCALE_2);
|
||||
x = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), atom,
|
||||
_MM_SCALE_2);
|
||||
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), atom+1,
|
||||
_MM_SCALE_2);
|
||||
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), atom+2,
|
||||
_MM_SCALE_2);
|
||||
}
|
||||
|
||||
inline void SIMD_atom_gather(const SIMD_mask &m, const double *atom,
|
||||
const SIMD_int &i, SIMD_double &x,
|
||||
SIMD_double &y, SIMD_double &z, SIMD_int &type) {
|
||||
x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom,
|
||||
_MM_SCALE_2);
|
||||
y = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+1,
|
||||
_MM_SCALE_2);
|
||||
z = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+2,
|
||||
_MM_SCALE_2);
|
||||
x = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), atom,
|
||||
_MM_SCALE_2);
|
||||
y = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), atom+1,
|
||||
_MM_SCALE_2);
|
||||
z = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), atom+2,
|
||||
_MM_SCALE_2);
|
||||
type = _mm512_mask_i32gather_epi32(_mm512_undefined_epi32(), m, i, atom+3,
|
||||
_MM_SCALE_2);
|
||||
}
|
||||
@ -888,10 +895,12 @@ namespace ip_simd {
|
||||
const SIMD_int &joffset, SIMD_double &eng) {
|
||||
SIMD_double jeng;
|
||||
SIMD_conflict_pi_reduce1(rmask, joffset, eng);
|
||||
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
|
||||
force, _MM_SCALE_2);
|
||||
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
|
||||
_mm512_castsi512_si256(joffset),
|
||||
force, _MM_SCALE_2);
|
||||
jeng = jeng + eng;
|
||||
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
|
||||
jeng, _MM_SCALE_2);
|
||||
}
|
||||
|
||||
inline void SIMD_jeng_update(const SIMD_mask &rmask, double *force,
|
||||
@ -899,20 +908,24 @@ namespace ip_simd {
|
||||
SIMD_double engd, jeng;
|
||||
engd = _mm512_cvtps_pd(_mm512_castps512_ps256(eng));
|
||||
SIMD_conflict_pi_reduce1(rmask, joffset, engd);
|
||||
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
|
||||
force, _MM_SCALE_2);
|
||||
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
|
||||
_mm512_castsi512_si256(joffset),
|
||||
force, _MM_SCALE_2);
|
||||
jeng = jeng + engd;
|
||||
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
|
||||
jeng, _MM_SCALE_2);
|
||||
|
||||
SIMD_mask rmask2 = rmask >> 8;
|
||||
engd = _mm512_cvtps_pd(_mm512_castps512_ps256(
|
||||
_mm512_shuffle_f32x4(eng,eng,238)));
|
||||
SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238);
|
||||
SIMD_conflict_pi_reduce1(rmask2, joffset2, engd);
|
||||
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
|
||||
force, _MM_SCALE_2);
|
||||
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
|
||||
_mm512_castsi512_si256(joffset2),
|
||||
force, _MM_SCALE_2);
|
||||
jeng = jeng + engd;
|
||||
_mm512_mask_i32loscatter_pd(force, rmask2, joffset2, jeng, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, rmask2, _mm512_castsi512_si256(joffset2),
|
||||
jeng, _MM_SCALE_2);
|
||||
}
|
||||
|
||||
inline void SIMD_jeng_update_hi(const SIMD_mask &mask, float *force,
|
||||
@ -926,10 +939,12 @@ namespace ip_simd {
|
||||
|
||||
SIMD_double jeng;
|
||||
SIMD_conflict_pi_reduce1(rmask, joffset, eng);
|
||||
jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
|
||||
force, _MM_SCALE_2);
|
||||
jeng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
|
||||
_mm512_castsi512_si256(joffset),
|
||||
force, _MM_SCALE_2);
|
||||
jeng = jeng + eng;
|
||||
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
|
||||
jeng, _MM_SCALE_2);
|
||||
}
|
||||
|
||||
inline void SIMD_safe_jforce(const SIMD_mask &m, float *force,
|
||||
@ -956,18 +971,24 @@ namespace ip_simd {
|
||||
SIMD_double &fy, SIMD_double &fz) {
|
||||
SIMD_conflict_pi_reduce3(m, i, fx, fy, fz);
|
||||
SIMD_double jfrc;
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc + fx;
|
||||
_mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1,
|
||||
_MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force + 1,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc + fy;
|
||||
_mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2,
|
||||
_MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+1, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force + 2,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc + fz;
|
||||
_mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+2, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
}
|
||||
|
||||
inline void SIMD_safe_jforce(const SIMD_mask &rmask, double *force,
|
||||
@ -979,40 +1000,54 @@ namespace ip_simd {
|
||||
amzd = _mm512_cvtps_pd(_mm512_castps512_ps256(amz));
|
||||
SIMD_conflict_pi_reduce3(rmask, joffset, amxd, amyd, amzd);
|
||||
SIMD_double jfrc;
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
|
||||
force, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
|
||||
_mm512_castsi512_si256(joffset),
|
||||
force, _MM_SCALE_2);
|
||||
jfrc = jfrc + amxd;
|
||||
_mm512_mask_i32loscatter_pd(force, rmask, joffset, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
|
||||
force + 1, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, rmask, _mm512_castsi512_si256(joffset),
|
||||
jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
|
||||
_mm512_castsi512_si256(joffset),
|
||||
force + 1, _MM_SCALE_2);
|
||||
jfrc = jfrc + amyd;
|
||||
_mm512_mask_i32loscatter_pd(force+1, rmask, joffset, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset,
|
||||
force + 2, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+1, rmask, _mm512_castsi512_si256(joffset),
|
||||
jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask,
|
||||
_mm512_castsi512_si256(joffset),
|
||||
force + 2, _MM_SCALE_2);
|
||||
jfrc = jfrc + amzd;
|
||||
_mm512_mask_i32loscatter_pd(force+2, rmask, joffset, jfrc, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+2, rmask, _mm512_castsi512_si256(joffset),
|
||||
jfrc, _MM_SCALE_2);
|
||||
|
||||
SIMD_mask rmask2 = rmask >> 8;
|
||||
amxd = _mm512_cvtps_pd(_mm512_castps512_ps256(
|
||||
_mm512_shuffle_f32x4(amx,amx,238)));
|
||||
_mm512_shuffle_f32x4(amx,amx,238)));
|
||||
amyd = _mm512_cvtps_pd(_mm512_castps512_ps256(
|
||||
_mm512_shuffle_f32x4(amy,amy,238)));
|
||||
_mm512_shuffle_f32x4(amy,amy,238)));
|
||||
amzd = _mm512_cvtps_pd(_mm512_castps512_ps256(
|
||||
_mm512_shuffle_f32x4(amz,amz,238)));
|
||||
_mm512_shuffle_f32x4(amz,amz,238)));
|
||||
SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238);
|
||||
SIMD_conflict_pi_reduce3(rmask2, joffset2, amxd, amyd, amzd);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
|
||||
force, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
|
||||
_mm512_castsi512_si256(joffset2),
|
||||
force, _MM_SCALE_2);
|
||||
jfrc = jfrc + amxd;
|
||||
_mm512_mask_i32loscatter_pd(force, rmask2, joffset2, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
|
||||
force + 1, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, rmask2, _mm512_castsi512_si256(joffset2),
|
||||
jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
|
||||
_mm512_castsi512_si256(joffset2),
|
||||
force + 1, _MM_SCALE_2);
|
||||
jfrc = jfrc + amyd;
|
||||
_mm512_mask_i32loscatter_pd(force+1, rmask2, joffset2, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2,
|
||||
force + 2, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+1, rmask2,
|
||||
_mm512_castsi512_si256(joffset2), jfrc,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), rmask2,
|
||||
_mm512_castsi512_si256(joffset2),
|
||||
force + 2, _MM_SCALE_2);
|
||||
jfrc = jfrc + amzd;
|
||||
_mm512_mask_i32loscatter_pd(force+2, rmask2, joffset2, jfrc, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+2, rmask2,
|
||||
_mm512_castsi512_si256(joffset2), jfrc,
|
||||
_MM_SCALE_2);
|
||||
}
|
||||
|
||||
inline void SIMD_jforce_update(const SIMD_mask &m, float *force,
|
||||
@ -1064,18 +1099,24 @@ namespace ip_simd {
|
||||
const SIMD_int &i, const SIMD_double &fx,
|
||||
const SIMD_double &fy, const SIMD_double &fz) {
|
||||
SIMD_double jfrc;
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc - fx;
|
||||
_mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1,
|
||||
_MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force + 1,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc - fy;
|
||||
_mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2,
|
||||
_MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+1, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force + 2,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc - fz;
|
||||
_mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+2, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
}
|
||||
|
||||
inline void SIMD_jforce_update(const SIMD_mask &rmask,
|
||||
@ -1502,11 +1543,12 @@ namespace ip_simd {
|
||||
fwtmp = SIMD_add(fwtmp, hmask, fwtmp, hevdwl);
|
||||
fjtmp = SIMD_add(fjtmp, hmask, fjtmp, hevdwl);
|
||||
SIMD_conflict_pi_reduce1(hmask, k, hevdwl);
|
||||
SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(),
|
||||
hmask, k, force + 3,
|
||||
_MM_SCALE_2);
|
||||
SIMD_double keng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), hmask,
|
||||
_mm512_castsi512_si256(k),
|
||||
force + 3, _MM_SCALE_2);
|
||||
keng = keng + hevdwl;
|
||||
_mm512_mask_i32loscatter_pd(force + 3, hmask, k, keng, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force + 3, hmask, _mm512_castsi512_si256(k),
|
||||
keng, _MM_SCALE_2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1523,11 +1565,12 @@ namespace ip_simd {
|
||||
fwtmp = SIMD_add(fwtmp, hmask, fwtmp, hevdwl);
|
||||
fjtmp = SIMD_add(fjtmp, hmask, fjtmp, hevdwl);
|
||||
SIMD_conflict_pi_reduce1(hmask, k, hevdwl);
|
||||
SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(),
|
||||
hmask, k, force + 3,
|
||||
_MM_SCALE_2);
|
||||
SIMD_double keng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), hmask,
|
||||
_mm512_castsi512_si256(k),
|
||||
force + 3, _MM_SCALE_2);
|
||||
keng = keng + hevdwl;
|
||||
_mm512_mask_i32loscatter_pd(force + 3, hmask, k, keng, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force + 3, hmask, _mm512_castsi512_si256(k),
|
||||
keng, _MM_SCALE_2);
|
||||
}
|
||||
SIMD_mask hmask2 = hmask >> 8;
|
||||
facradd = _mm512_cvtps_pd(_mm512_castps512_ps256(
|
||||
@ -1539,11 +1582,13 @@ namespace ip_simd {
|
||||
fjtmp2 = SIMD_add(fjtmp2, hmask2, fjtmp2, hevdwl);
|
||||
SIMD_int k2 = _mm512_shuffle_i32x4(k, k, 238);
|
||||
SIMD_conflict_pi_reduce1(hmask2, k2, hevdwl);
|
||||
SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(),
|
||||
hmask2, k2, force + 3,
|
||||
_MM_SCALE_2);
|
||||
SIMD_double keng = _mm512_mask_i32gather_pd(_mm512_undefined_pd(),
|
||||
hmask2,
|
||||
_mm512_castsi512_si256(k2),
|
||||
force + 3, _MM_SCALE_2);
|
||||
keng = keng + hevdwl;
|
||||
_mm512_mask_i32loscatter_pd(force + 3, hmask2, k2, keng, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force + 3, hmask2, _mm512_castsi512_si256(k2),
|
||||
keng, _MM_SCALE_2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1815,24 +1860,32 @@ namespace ip_simd {
|
||||
const int EFLAG, const int eatom,
|
||||
const SIMD_double &fwtmp) {
|
||||
SIMD_double jfrc;
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc + fx;
|
||||
_mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1,
|
||||
_MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force + 1,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc + fy;
|
||||
_mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2,
|
||||
_MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+1, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i), force + 2,
|
||||
_MM_SCALE_2);
|
||||
jfrc = jfrc + fz;
|
||||
_mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+2, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
if (EFLAG) {
|
||||
if (eatom) {
|
||||
jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i,
|
||||
force + 3, _MM_SCALE_2);
|
||||
jfrc = _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
|
||||
_mm512_castsi512_si256(i),
|
||||
force + 3, _MM_SCALE_2);
|
||||
jfrc = jfrc + fwtmp;
|
||||
_mm512_mask_i32loscatter_pd(force+3, m, i, jfrc, _MM_SCALE_2);
|
||||
_mm512_mask_i32scatter_pd(force+3, m, _mm512_castsi512_si256(i), jfrc,
|
||||
_MM_SCALE_2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -324,7 +324,11 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++)
|
||||
tj[ncount++] = binpacked[jj];
|
||||
@ -345,15 +349,23 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
const int bstart = binhead[ibin + stencil[k]];
|
||||
const int bend = binhead[ibin + stencil[k] + 1];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++)
|
||||
tj[ncount++] = binpacked[jj];
|
||||
}
|
||||
} // if i < nlocal
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int u = 0; u < ncount; u++) {
|
||||
const int j = tj[u];
|
||||
@ -425,12 +437,16 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
int alln = n;
|
||||
n = 0;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#ifdef LMP_INTEL_NBOR_COMPAT
|
||||
#pragma ivdep
|
||||
#else
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int u = 0; u < alln; u++) {
|
||||
int which;
|
||||
@ -454,12 +470,16 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
alln = n2;
|
||||
n2 = maxnbors * 2;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#ifdef LMP_INTEL_NBOR_COMPAT
|
||||
#pragma ivdep
|
||||
#else
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int u = n2; u < alln; u++) {
|
||||
int which;
|
||||
|
||||
@ -344,14 +344,22 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++)
|
||||
tj[ncount++] = binpacked[jj];
|
||||
}
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int u = 0; u < ncount; u++) {
|
||||
const int j = tj[u];
|
||||
@ -375,7 +383,11 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
const int bstart = binhead[ibin];
|
||||
const int bend = binhead[ibin + 1];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++) {
|
||||
const int j = binpacked[jj];
|
||||
@ -533,12 +545,16 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
|
||||
n = pack_offset;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#ifdef LMP_INTEL_NBOR_COMPAT
|
||||
#pragma ivdep
|
||||
#else
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int u = n; u < alln; u++) {
|
||||
int which;
|
||||
@ -566,12 +582,16 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
n2 = pack_offset + maxnbors;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#ifdef LMP_INTEL_NBOR_COMPAT
|
||||
#pragma ivdep
|
||||
#else
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int u = n2; u < alln; u++) {
|
||||
int which;
|
||||
@ -737,8 +757,14 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
int jnum = numneigh[i];
|
||||
if (!THREE) IP_PRE_neighbor_pad(jnum, offload);
|
||||
#if __INTEL_COMPILER+0 > 1499
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(max:vlmax,vgmax) \
|
||||
reduction(min:vlmin, vgmin)
|
||||
#else
|
||||
#pragma simd reduction(max:vlmax,vgmax) \
|
||||
reduction(min:vlmin, vgmin)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj] & NEIGHMASK;
|
||||
@ -782,8 +808,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
int jnum = numneigh[i];
|
||||
if (!THREE) IP_PRE_neighbor_pad(jnum, offload);
|
||||
int jj = 0;
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
const int which = jlist[jj] >> SBBITS & 3;
|
||||
const int j = jlist[jj] & NEIGHMASK;
|
||||
|
||||
@ -292,8 +292,9 @@ void PairAIREBOIntel::compute(
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
pvector[0] = pvector[1] = pvector[2] = 0.0;
|
||||
|
||||
|
||||
@ -77,8 +77,9 @@ void PairBuckCoulCutIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -248,12 +249,18 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
||||
fxtmp = fytmp = fztmp = (acc_t)0;
|
||||
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0)
|
||||
if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
if (vflag == VIRIAL_PAIR)
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
flt_t forcecoul, forcebuck, evdwl, ecoul;
|
||||
|
||||
@ -77,8 +77,9 @@ void PairBuckCoulLongIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -309,9 +310,14 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
flt_t forcecoul, forcebuck, evdwl, ecoul;
|
||||
|
||||
@ -70,8 +70,9 @@ void PairBuckIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -230,12 +231,18 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
||||
fxtmp = fytmp = fztmp = (acc_t)0;
|
||||
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0)
|
||||
if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
if (vflag == VIRIAL_PAIR)
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
|
||||
|
||||
@ -89,8 +89,9 @@ void PairDPDIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag, vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -289,9 +290,14 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
flt_t forcelj, evdwl;
|
||||
|
||||
@ -82,8 +82,9 @@ void PairEAMIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag, vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -327,8 +328,12 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:rhoi)
|
||||
#else
|
||||
#pragma simd reduction(+:rhoi)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
int jtype;
|
||||
@ -369,23 +374,35 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
const int rcount = nall;
|
||||
if (nthreads == 2) {
|
||||
double *trho2 = rho + nmax;
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
for (int n = 0; n < rcount; n++)
|
||||
rho[n] += trho2[n];
|
||||
} else if (nthreads == 4) {
|
||||
double *trho2 = rho + nmax;
|
||||
double *trho3 = trho2 + nmax;
|
||||
double *trho4 = trho3 + nmax;
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
for (int n = 0; n < rcount; n++)
|
||||
rho[n] += trho2[n] + trho3[n] + trho4[n];
|
||||
} else {
|
||||
double *trhon = rho + nmax;
|
||||
for (int t = 1; t < nthreads; t++) {
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
for (int n = 0; n < rcount; n++)
|
||||
rho[n] += trhon[n];
|
||||
trhon += nmax;
|
||||
@ -414,8 +431,12 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
if (EFLAG) tevdwl = (acc_t)0.0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:tevdwl)
|
||||
#else
|
||||
#pragma simd reduction(+:tevdwl)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int ii = iifrom; ii < iito; ++ii) {
|
||||
const int i = ilist[ii];
|
||||
@ -510,9 +531,14 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
int jtype;
|
||||
|
||||
@ -76,8 +76,9 @@ void PairGayBerneIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag, vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
@ -449,9 +450,14 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
__assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0);
|
||||
#endif
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp, \
|
||||
t3tmp,sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp, \
|
||||
t3tmp,sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \
|
||||
sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < packed_j; jj++) {
|
||||
flt_t a2_0, a2_1, a2_2, a2_3, a2_4, a2_5, a2_6, a2_7, a2_8;
|
||||
@ -806,8 +812,12 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
||||
acc_t *f_scalar2 = f_scalar + fst4;
|
||||
for (int t = 1; t < nthreads; t++) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int n = iifrom * 8; n < sto; n++)
|
||||
f_scalar[n] += f_scalar2[n];
|
||||
|
||||
@ -73,8 +73,9 @@ void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -294,9 +295,14 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
flt_t forcecoul, forcelj, evdwl;
|
||||
|
||||
@ -77,8 +77,9 @@ void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -314,9 +315,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
flt_t forcecoul, forcelj, evdwl, ecoul;
|
||||
|
||||
@ -76,8 +76,9 @@ void PairLJCutCoulLongIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -305,9 +306,14 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
secoul, sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
flt_t forcecoul, forcelj, evdwl, ecoul;
|
||||
|
||||
@ -68,8 +68,9 @@ void PairLJCutIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag, vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -241,9 +242,15 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
if (vflag == VIRIAL_PAIR) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5) \
|
||||
aligned(jlist,x,ljc12oi,special_lj,f,lj34i:64)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
flt_t forcelj, evdwl;
|
||||
|
||||
@ -97,8 +97,9 @@ void PairSWIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag, vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
@ -371,8 +372,12 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl)
|
||||
#else
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int jj = 0; jj < ejnum_pad; jj++) {
|
||||
acc_t fjxtmp, fjytmp, fjztmp, fjtmp;
|
||||
|
||||
@ -91,8 +91,9 @@ void PairTersoffIntel::compute(int eflag, int vflag,
|
||||
ev_init(eflag,vflag);
|
||||
if (vflag_atom)
|
||||
error->all(FLERR,"INTEL package does not support per-atom stress");
|
||||
if (vflag && !vflag_fdotr)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr");
|
||||
if (vflag && !vflag_fdotr && force->newton_pair)
|
||||
error->all(FLERR,"INTEL package does not support pair_modify nofdotr "
|
||||
"with newton on");
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
|
||||
@ -770,8 +770,12 @@ void PPPMDispIntel::particle_map(double delx, double dely, double delz,
|
||||
IP_PRE_omp_range_id_align(iifrom, iito, tid, nlocal, nthr, sizeof(ATOM_T));
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:flag)
|
||||
#else
|
||||
#pragma simd reduction(+:flag)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = iifrom; i < iito; i++) {
|
||||
|
||||
@ -876,7 +880,11 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho_lookup[idx][k];
|
||||
@ -885,7 +893,11 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower; k <= nupper; k++) {
|
||||
FFT_SCALAR r1,r2,r3;
|
||||
@ -917,8 +929,12 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int mzy = m*nix + mz;
|
||||
FFT_SCALAR x0 = y0*rho[1][m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mzyx = l + mzy;
|
||||
@ -939,7 +955,11 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid, nthr);
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
for (int j = 1; j < nthr; j++) {
|
||||
@ -1025,7 +1045,11 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho6_lookup[idx][k];
|
||||
@ -1034,7 +1058,11 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1,r2,r3;
|
||||
@ -1067,8 +1095,12 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int mzy = m*nix + mz;
|
||||
FFT_SCALAR x0 = y0*rho[1][m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mzyx = l + mzy;
|
||||
@ -1089,7 +1121,11 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid_6, nthr);
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
for (int j = 1; j < nthr; j++) {
|
||||
@ -1173,7 +1209,11 @@ void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho6_lookup[idx][k];
|
||||
@ -1182,7 +1222,11 @@ void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1,r2,r3;
|
||||
@ -1215,8 +1259,12 @@ void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int my = m + nysum;
|
||||
FFT_SCALAR x0 = y0*rho[1][m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l + nxsum;
|
||||
@ -1307,7 +1355,11 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho6_lookup[idx][k];
|
||||
@ -1316,7 +1368,11 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1,r2,r3;
|
||||
@ -1349,8 +1405,12 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int mzy = m*nix + mz;
|
||||
FFT_SCALAR x0 = y0*rho[1][m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mzyx = l + mzy;
|
||||
@ -1373,7 +1433,11 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid_6*nsplit, nthr);
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
for (int j = 1; j < nthr; j++) {
|
||||
@ -1454,7 +1518,11 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho0[k] = rho_lookup[idx][k];
|
||||
@ -1463,7 +1531,11 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower; k <= nupper; k++) {
|
||||
FFT_SCALAR r1 = rho_coeff[order-1][k];
|
||||
@ -1498,8 +1570,12 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int my = m+nysum;
|
||||
FFT_SCALAR y0 = z0*rho1[m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l+nxsum;
|
||||
@ -1624,7 +1700,11 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int idz = dz;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho_lookup[idx][k];
|
||||
@ -1636,7 +1716,11 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower; k <= nupper; k++) {
|
||||
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
|
||||
@ -1680,8 +1764,12 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
|
||||
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l + nxsum;
|
||||
@ -1702,7 +1790,11 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
}
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
particle_ekx[i] *= hx_inv;
|
||||
@ -1802,7 +1894,11 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho0[k] = rho6_lookup[idx][k];
|
||||
@ -1811,7 +1907,11 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1 = rho_coeff_6[order_6-1][k];
|
||||
@ -1846,8 +1946,12 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int my = m+nysum;
|
||||
FFT_SCALAR y0 = z0*rho1[m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l+nxsum;
|
||||
@ -1967,7 +2071,11 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int idz = dz;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho6_lookup[idx][k];
|
||||
@ -1979,7 +2087,11 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
|
||||
@ -2023,8 +2135,12 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
|
||||
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l + nxsum;
|
||||
@ -2045,7 +2161,11 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
}
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
particle_ekx[i] *= hx_inv;
|
||||
@ -2143,7 +2263,11 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho0[k] = rho6_lookup[idx][k];
|
||||
@ -2152,7 +2276,11 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1 = rho_coeff_6[order_6-1][k];
|
||||
@ -2206,8 +2334,12 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int my = m+nysum;
|
||||
FFT_SCALAR y0 = z0*rho1[m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l+nxsum;
|
||||
@ -2398,7 +2530,11 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int idz = dz;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho6_lookup[idx][k];
|
||||
@ -2410,7 +2546,11 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
|
||||
@ -2479,8 +2619,12 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
|
||||
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l + nxsum;
|
||||
@ -2541,7 +2685,11 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
}
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
particle_ekx0[i] *= hx_inv;
|
||||
@ -2671,7 +2819,11 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho0[k] = rho6_lookup[idx][k];
|
||||
@ -2680,7 +2832,11 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1 = rho_coeff_6[order_6-1][k];
|
||||
@ -2721,8 +2877,12 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int my = m+nysum;
|
||||
FFT_SCALAR y0 = z0*rho1[m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l+nxsum;
|
||||
@ -2848,7 +3008,11 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
int idz = dz;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho6_lookup[idx][k];
|
||||
@ -2860,7 +3024,11 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower_6; k <= nupper_6; k++) {
|
||||
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
|
||||
@ -2909,8 +3077,12 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
||||
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
|
||||
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
|
||||
#endif
|
||||
for (int l = 0; l < order; l++) {
|
||||
int mx = l + nxsum;
|
||||
@ -2992,7 +3164,11 @@ void PPPMDispIntel::precompute_rho()
|
||||
for (int i = 0; i < rho_points; i++) {
|
||||
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k=nlower; k<=nupper;k++) {
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
@ -3006,7 +3182,11 @@ void PPPMDispIntel::precompute_rho()
|
||||
}
|
||||
if (differentiation_flag == 1) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k=nlower; k<=nupper;k++) {
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
@ -3026,7 +3206,11 @@ void PPPMDispIntel::precompute_rho()
|
||||
for (int i = 0; i < rho_points; i++) {
|
||||
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k=nlower_6; k<=nupper_6;k++) {
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
@ -3040,7 +3224,11 @@ void PPPMDispIntel::precompute_rho()
|
||||
}
|
||||
if (differentiation_flag == 1) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k=nlower_6; k<=nupper_6;k++) {
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
|
||||
@ -394,8 +394,12 @@ void PPPMIntel::particle_map(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
IP_PRE_omp_range_id_align(iifrom, iito, tid, nlocal, nthr, sizeof(ATOM_T));
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd reduction(+:flag)
|
||||
#else
|
||||
#pragma simd reduction(+:flag)
|
||||
#endif
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = iifrom; i < iito; i++) {
|
||||
|
||||
@ -500,7 +504,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho_lookup[idx][k];
|
||||
@ -509,7 +517,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower; k <= nupper; k++) {
|
||||
FFT_SCALAR r1,r2,r3;
|
||||
@ -541,7 +553,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
int mzy = m*nix + mz;
|
||||
FFT_SCALAR x0 = y0*rho[1][m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
|
||||
int mzyx = l + mzy;
|
||||
@ -563,7 +579,11 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, ngrid, nthr);
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
for (int j = 1; j < nthr; j++) {
|
||||
@ -645,7 +665,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho0[k] = rho_lookup[idx][k];
|
||||
@ -654,7 +678,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower; k <= nupper; k++) {
|
||||
FFT_SCALAR r1 = rho_coeff[order-1][k];
|
||||
@ -690,7 +718,11 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
int my = m+nysum;
|
||||
FFT_SCALAR y0 = z0*rho1[m];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
|
||||
int mx = l+nxsum;
|
||||
@ -813,7 +845,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
dz = dz*half_rho_scale + half_rho_scale_plus;
|
||||
int idz = dz;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho[0][k] = rho_lookup[idx][k];
|
||||
@ -825,7 +861,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
}
|
||||
} else {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k = nlower; k <= nupper; k++) {
|
||||
FFT_SCALAR r1,r2,r3,dr1,dr2,dr3;
|
||||
@ -871,7 +911,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
FFT_SCALAR eky_p = drho[1][m] * rho[2][n];
|
||||
FFT_SCALAR ekz_p = rho[1][m] * drho[2][n];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
|
||||
int mx = l + nxsum;
|
||||
@ -893,7 +937,11 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
particle_ekx[i] *= hx_inv;
|
||||
@ -942,7 +990,11 @@ void PPPMIntel::precompute_rho()
|
||||
for (int i = 0; i < rho_points; i++) {
|
||||
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k=nlower; k<=nupper;k++) {
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
@ -956,7 +1008,11 @@ void PPPMIntel::precompute_rho()
|
||||
}
|
||||
if (differentiation_flag == 1) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#if defined(USE_OMP_SIMD)
|
||||
#pragma omp simd
|
||||
#else
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int k=nlower; k<=nupper;k++) {
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
@ -14,28 +13,26 @@
|
||||
|
||||
#include "compute_force_tally.h"
|
||||
|
||||
#include <cmath>
|
||||
#include "atom.h"
|
||||
#include "group.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include "memory.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include <cmath>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg)
|
||||
ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
|
||||
{
|
||||
if (narg < 4) error->all(FLERR,"Illegal compute force/tally command");
|
||||
if (narg < 4) error->all(FLERR, "Illegal compute force/tally command");
|
||||
|
||||
igroup2 = group->find(arg[3]);
|
||||
if (igroup2 == -1)
|
||||
error->all(FLERR,"Could not find compute force/tally second group ID");
|
||||
if (igroup2 == -1) error->all(FLERR, "Could not find compute force/tally second group ID");
|
||||
groupbit2 = group->bitmask[igroup2];
|
||||
|
||||
scalar_flag = 1;
|
||||
@ -46,7 +43,7 @@ ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
comm_reverse = size_peratom_cols = 3;
|
||||
extscalar = 1;
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
|
||||
did_setup = invoked_peratom = invoked_scalar = -1;
|
||||
nmax = -1;
|
||||
@ -68,17 +65,16 @@ ComputeForceTally::~ComputeForceTally()
|
||||
void ComputeForceTally::init()
|
||||
{
|
||||
if (force->pair == nullptr)
|
||||
error->all(FLERR,"Trying to use compute force/tally without pair style");
|
||||
error->all(FLERR, "Trying to use compute force/tally without pair style");
|
||||
else
|
||||
force->pair->add_tally_callback(this);
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
|
||||
error->warning(FLERR,"Compute force/tally used with incompatible pair style");
|
||||
error->warning(FLERR, "Compute force/tally used with incompatible pair style");
|
||||
|
||||
if (force->bond || force->angle || force->dihedral
|
||||
|| force->improper || force->kspace)
|
||||
error->warning(FLERR,"Compute force/tally only called from pair style");
|
||||
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
|
||||
error->warning(FLERR, "Compute force/tally only called from pair style");
|
||||
}
|
||||
did_setup = -1;
|
||||
}
|
||||
@ -99,51 +95,48 @@ void ComputeForceTally::pair_setup_callback(int, int)
|
||||
if (atom->nmax > nmax) {
|
||||
memory->destroy(fatom);
|
||||
nmax = atom->nmax;
|
||||
memory->create(fatom,nmax,size_peratom_cols,"force/tally:fatom");
|
||||
memory->create(fatom, nmax, size_peratom_cols, "force/tally:fatom");
|
||||
array_atom = fatom;
|
||||
}
|
||||
|
||||
// clear storage
|
||||
|
||||
for (int i=0; i < ntotal; ++i)
|
||||
for (int j=0; j < size_peratom_cols; ++j)
|
||||
fatom[i][j] = 0.0;
|
||||
for (int i = 0; i < ntotal; ++i)
|
||||
for (int j = 0; j < size_peratom_cols; ++j) fatom[i][j] = 0.0;
|
||||
|
||||
for (int i=0; i < size_peratom_cols; ++i)
|
||||
vector[i] = ftotal[i] = 0.0;
|
||||
for (int i = 0; i < size_peratom_cols; ++i) vector[i] = ftotal[i] = 0.0;
|
||||
|
||||
did_setup = update->ntimestep;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton,
|
||||
double, double, double fpair,
|
||||
double dx, double dy, double dz)
|
||||
void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton, double, double,
|
||||
double fpair, double dx, double dy, double dz)
|
||||
{
|
||||
const int * const mask = atom->mask;
|
||||
const int *const mask = atom->mask;
|
||||
|
||||
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|
||||
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
|
||||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
|
||||
if (newton || i < nlocal) {
|
||||
if (mask[i] & groupbit) {
|
||||
ftotal[0] += fpair*dx;
|
||||
ftotal[1] += fpair*dy;
|
||||
ftotal[2] += fpair*dz;
|
||||
ftotal[0] += fpair * dx;
|
||||
ftotal[1] += fpair * dy;
|
||||
ftotal[2] += fpair * dz;
|
||||
}
|
||||
fatom[i][0] += fpair*dx;
|
||||
fatom[i][1] += fpair*dy;
|
||||
fatom[i][2] += fpair*dz;
|
||||
fatom[i][0] += fpair * dx;
|
||||
fatom[i][1] += fpair * dy;
|
||||
fatom[i][2] += fpair * dz;
|
||||
}
|
||||
if (newton || j < nlocal) {
|
||||
if (mask[j] & groupbit) {
|
||||
ftotal[0] -= fpair*dx;
|
||||
ftotal[1] -= fpair*dy;
|
||||
ftotal[2] -= fpair*dz;
|
||||
ftotal[0] -= fpair * dx;
|
||||
ftotal[1] -= fpair * dy;
|
||||
ftotal[2] -= fpair * dz;
|
||||
}
|
||||
fatom[j][0] -= fpair*dx;
|
||||
fatom[j][1] -= fpair*dy;
|
||||
fatom[j][2] -= fpair*dz;
|
||||
fatom[j][0] -= fpair * dx;
|
||||
fatom[j][1] -= fpair * dy;
|
||||
fatom[j][2] -= fpair * dz;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -152,7 +145,7 @@ void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton
|
||||
|
||||
int ComputeForceTally::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
int i, m, last;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
@ -168,7 +161,7 @@ int ComputeForceTally::pack_reverse_comm(int n, int first, double *buf)
|
||||
|
||||
void ComputeForceTally::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
int i, j, m;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -184,15 +177,14 @@ void ComputeForceTally::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
double ComputeForceTally::compute_scalar()
|
||||
{
|
||||
invoked_scalar = update->ntimestep;
|
||||
if ((did_setup != invoked_scalar)
|
||||
|| (update->eflag_global != invoked_scalar))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
if ((did_setup != invoked_scalar) || (update->eflag_global != invoked_scalar))
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// sum accumulated forces across procs
|
||||
|
||||
MPI_Allreduce(ftotal,vector,size_peratom_cols,MPI_DOUBLE,MPI_SUM,world);
|
||||
MPI_Allreduce(ftotal, vector, size_peratom_cols, MPI_DOUBLE, MPI_SUM, world);
|
||||
|
||||
scalar = sqrt(vector[0]*vector[0]+vector[1]*vector[1]+vector[2]*vector[2]);
|
||||
scalar = sqrt(vector[0] * vector[0] + vector[1] * vector[1] + vector[2] * vector[2]);
|
||||
return scalar;
|
||||
}
|
||||
|
||||
@ -201,9 +193,8 @@ double ComputeForceTally::compute_scalar()
|
||||
void ComputeForceTally::compute_peratom()
|
||||
{
|
||||
invoked_peratom = update->ntimestep;
|
||||
if ((did_setup != invoked_peratom)
|
||||
|| (update->eflag_global != invoked_peratom))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
if ((did_setup != invoked_peratom) || (update->eflag_global != invoked_peratom))
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// collect contributions from ghost atoms
|
||||
|
||||
@ -213,8 +204,7 @@ void ComputeForceTally::compute_peratom()
|
||||
// clear out ghost atom data after it has been collected to local atoms
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
for (int i = atom->nlocal; i < nall; ++i)
|
||||
for (int j = 0; j < size_peratom_cols; ++j)
|
||||
fatom[i][j] = 0.0;
|
||||
for (int j = 0; j < size_peratom_cols; ++j) fatom[i][j] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -224,7 +214,6 @@ void ComputeForceTally::compute_peratom()
|
||||
|
||||
double ComputeForceTally::memory_usage()
|
||||
{
|
||||
double bytes = (nmax < 0) ? 0 : nmax*size_peratom_cols * sizeof(double);
|
||||
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
@ -15,26 +14,25 @@
|
||||
#include "compute_heat_flux_tally.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "group.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include "memory.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg)
|
||||
Compute(lmp, narg, arg)
|
||||
{
|
||||
if (narg < 4) error->all(FLERR,"Illegal compute heat/flux/tally command");
|
||||
if (narg < 4) error->all(FLERR, "Illegal compute heat/flux/tally command");
|
||||
|
||||
igroup2 = group->find(arg[3]);
|
||||
if (igroup2 == -1)
|
||||
error->all(FLERR,"Could not find compute heat/flux/tally second group ID");
|
||||
if (igroup2 == -1) error->all(FLERR, "Could not find compute heat/flux/tally second group ID");
|
||||
groupbit2 = group->bitmask[igroup2];
|
||||
|
||||
vector_flag = 1;
|
||||
@ -44,7 +42,7 @@ ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
comm_reverse = 7;
|
||||
extvector = 1;
|
||||
size_vector = 6;
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
|
||||
did_setup = 0;
|
||||
invoked_peratom = invoked_scalar = -1;
|
||||
@ -71,17 +69,16 @@ ComputeHeatFluxTally::~ComputeHeatFluxTally()
|
||||
void ComputeHeatFluxTally::init()
|
||||
{
|
||||
if (force->pair == nullptr)
|
||||
error->all(FLERR,"Trying to use compute heat/flux/tally without pair style");
|
||||
error->all(FLERR, "Trying to use compute heat/flux/tally without pair style");
|
||||
else
|
||||
force->pair->add_tally_callback(this);
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
|
||||
error->warning(FLERR,"Compute heat/flux/tally used with incompatible pair style");
|
||||
error->warning(FLERR, "Compute heat/flux/tally used with incompatible pair style");
|
||||
|
||||
if (force->bond || force->angle || force->dihedral
|
||||
|| force->improper || force->kspace)
|
||||
error->warning(FLERR,"Compute heat/flux/tally only called from pair style");
|
||||
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
|
||||
error->warning(FLERR, "Compute heat/flux/tally only called from pair style");
|
||||
}
|
||||
did_setup = -1;
|
||||
}
|
||||
@ -102,13 +99,13 @@ void ComputeHeatFluxTally::pair_setup_callback(int, int)
|
||||
memory->destroy(stress);
|
||||
memory->destroy(eatom);
|
||||
nmax = atom->nmax;
|
||||
memory->create(stress,nmax,6,"heat/flux/tally:stress");
|
||||
memory->create(eatom,nmax,"heat/flux/tally:eatom");
|
||||
memory->create(stress, nmax, 6, "heat/flux/tally:stress");
|
||||
memory->create(eatom, nmax, "heat/flux/tally:eatom");
|
||||
}
|
||||
|
||||
// clear storage
|
||||
|
||||
for (int i=0; i < ntotal; ++i) {
|
||||
for (int i = 0; i < ntotal; ++i) {
|
||||
eatom[i] = 0.0;
|
||||
stress[i][0] = 0.0;
|
||||
stress[i][1] = 0.0;
|
||||
@ -118,30 +115,29 @@ void ComputeHeatFluxTally::pair_setup_callback(int, int)
|
||||
stress[i][5] = 0.0;
|
||||
}
|
||||
|
||||
for (int i=0; i < size_vector; ++i)
|
||||
vector[i] = heatj[i] = 0.0;
|
||||
for (int i = 0; i < size_vector; ++i) vector[i] = heatj[i] = 0.0;
|
||||
|
||||
did_setup = update->ntimestep;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton,
|
||||
double evdwl, double ecoul, double fpair,
|
||||
double dx, double dy, double dz)
|
||||
void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton, double evdwl,
|
||||
double ecoul, double fpair, double dx, double dy,
|
||||
double dz)
|
||||
{
|
||||
const int * const mask = atom->mask;
|
||||
const int *const mask = atom->mask;
|
||||
|
||||
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|
||||
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
|
||||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
|
||||
const double epairhalf = 0.5 * (evdwl + ecoul);
|
||||
fpair *= 0.5;
|
||||
const double v0 = dx*dx*fpair; // dx*fpair = Fij_x
|
||||
const double v1 = dy*dy*fpair;
|
||||
const double v2 = dz*dz*fpair;
|
||||
const double v3 = dx*dy*fpair;
|
||||
const double v4 = dx*dz*fpair;
|
||||
const double v5 = dy*dz*fpair;
|
||||
const double v0 = dx * dx * fpair; // dx*fpair = Fij_x
|
||||
const double v1 = dy * dy * fpair;
|
||||
const double v2 = dz * dz * fpair;
|
||||
const double v3 = dx * dy * fpair;
|
||||
const double v4 = dx * dz * fpair;
|
||||
const double v5 = dy * dz * fpair;
|
||||
|
||||
if (newton || i < nlocal) {
|
||||
eatom[i] += epairhalf;
|
||||
@ -168,7 +164,7 @@ void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int new
|
||||
|
||||
int ComputeHeatFluxTally::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
int i, m, last;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
@ -188,7 +184,7 @@ int ComputeHeatFluxTally::pack_reverse_comm(int n, int first, double *buf)
|
||||
|
||||
void ComputeHeatFluxTally::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
int i, j, m;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -209,7 +205,7 @@ void ComputeHeatFluxTally::compute_vector()
|
||||
{
|
||||
invoked_vector = update->ntimestep;
|
||||
if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// collect contributions from ghost atoms
|
||||
|
||||
@ -244,26 +240,28 @@ void ComputeHeatFluxTally::compute_vector()
|
||||
double *rmass = atom->rmass;
|
||||
int *type = atom->type;
|
||||
|
||||
double jc[3] = {0.0,0.0,0.0};
|
||||
double jv[3] = {0.0,0.0,0.0};
|
||||
double jc[3] = {0.0, 0.0, 0.0};
|
||||
double jv[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
const double * const vi = v[i];
|
||||
const double * const si = stress[i];
|
||||
const double *const vi = v[i];
|
||||
const double *const si = stress[i];
|
||||
double ke_i;
|
||||
|
||||
if (rmass) ke_i = pfactor * rmass[i];
|
||||
else ke_i = pfactor * mass[type[i]];
|
||||
ke_i *= (vi[0]*vi[0] + vi[1]*vi[1] + vi[2]*vi[2]);
|
||||
if (rmass)
|
||||
ke_i = pfactor * rmass[i];
|
||||
else
|
||||
ke_i = pfactor * mass[type[i]];
|
||||
ke_i *= (vi[0] * vi[0] + vi[1] * vi[1] + vi[2] * vi[2]);
|
||||
ke_i += eatom[i];
|
||||
|
||||
jc[0] += ke_i*vi[0];
|
||||
jc[1] += ke_i*vi[1];
|
||||
jc[2] += ke_i*vi[2];
|
||||
jv[0] += si[0]*vi[0] + si[3]*vi[1] + si[4]*vi[2];
|
||||
jv[1] += si[3]*vi[0] + si[1]*vi[1] + si[5]*vi[2];
|
||||
jv[2] += si[4]*vi[0] + si[5]*vi[1] + si[2]*vi[2];
|
||||
jc[0] += ke_i * vi[0];
|
||||
jc[1] += ke_i * vi[1];
|
||||
jc[2] += ke_i * vi[2];
|
||||
jv[0] += si[0] * vi[0] + si[3] * vi[1] + si[4] * vi[2];
|
||||
jv[1] += si[3] * vi[0] + si[1] * vi[1] + si[5] * vi[2];
|
||||
jv[2] += si[4] * vi[0] + si[5] * vi[1] + si[2] * vi[2];
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,7 +272,7 @@ void ComputeHeatFluxTally::compute_vector()
|
||||
heatj[3] = jc[0];
|
||||
heatj[4] = jc[1];
|
||||
heatj[5] = jc[2];
|
||||
MPI_Allreduce(heatj,vector,size_vector,MPI_DOUBLE,MPI_SUM,world);
|
||||
MPI_Allreduce(heatj, vector, size_vector, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -283,7 +281,6 @@ void ComputeHeatFluxTally::compute_vector()
|
||||
|
||||
double ComputeHeatFluxTally::memory_usage()
|
||||
{
|
||||
double bytes = (nmax < 0) ? 0 : nmax*comm_reverse * sizeof(double);
|
||||
double bytes = (nmax < 0) ? 0 : nmax * (double)comm_reverse * sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
@ -233,6 +233,6 @@ void ComputeHeatFluxVirialTally::compute_peratom()
|
||||
|
||||
double ComputeHeatFluxVirialTally::memory_usage()
|
||||
{
|
||||
double bytes = (nmax < 0) ? 0 : nmax * size_peratom_cols * sizeof(double);
|
||||
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
@ -15,25 +14,23 @@
|
||||
#include "compute_pe_mol_tally.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "group.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "comm.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg)
|
||||
ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
|
||||
{
|
||||
if (narg < 4) error->all(FLERR,"Illegal compute pe/mol/tally command");
|
||||
if (narg < 4) error->all(FLERR, "Illegal compute pe/mol/tally command");
|
||||
|
||||
igroup2 = group->find(arg[3]);
|
||||
if (igroup2 == -1)
|
||||
error->all(FLERR,"Could not find compute pe/mol/tally second group ID");
|
||||
if (igroup2 == -1) error->all(FLERR, "Could not find compute pe/mol/tally second group ID");
|
||||
groupbit2 = group->bitmask[igroup2];
|
||||
|
||||
vector_flag = 1;
|
||||
@ -42,7 +39,7 @@ ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
dynamic_group_allow = 0;
|
||||
|
||||
extvector = 1;
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
|
||||
did_setup = invoked_vector = -1;
|
||||
vector = new double[size_vector];
|
||||
@ -61,20 +58,18 @@ ComputePEMolTally::~ComputePEMolTally()
|
||||
void ComputePEMolTally::init()
|
||||
{
|
||||
if (force->pair == nullptr)
|
||||
error->all(FLERR,"Trying to use compute pe/mol/tally without pair style");
|
||||
error->all(FLERR, "Trying to use compute pe/mol/tally without pair style");
|
||||
else
|
||||
force->pair->add_tally_callback(this);
|
||||
|
||||
if (atom->molecule_flag == 0)
|
||||
error->all(FLERR,"Compute pe/mol/tally requires molecule IDs");
|
||||
if (atom->molecule_flag == 0) error->all(FLERR, "Compute pe/mol/tally requires molecule IDs");
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
|
||||
error->warning(FLERR,"Compute pe/mol/tally used with incompatible pair style");
|
||||
error->warning(FLERR, "Compute pe/mol/tally used with incompatible pair style");
|
||||
|
||||
if (force->bond || force->angle || force->dihedral
|
||||
|| force->improper || force->kspace)
|
||||
error->warning(FLERR,"Compute pe/mol/tally only called from pair style");
|
||||
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
|
||||
error->warning(FLERR, "Compute pe/mol/tally only called from pair style");
|
||||
}
|
||||
did_setup = -1;
|
||||
}
|
||||
@ -93,29 +88,33 @@ void ComputePEMolTally::pair_setup_callback(int, int)
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton,
|
||||
double evdwl, double ecoul, double,
|
||||
double, double, double)
|
||||
void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton, double evdwl,
|
||||
double ecoul, double, double, double, double)
|
||||
{
|
||||
const int * const mask = atom->mask;
|
||||
const tagint * const molid = atom->molecule;
|
||||
const int *const mask = atom->mask;
|
||||
const tagint *const molid = atom->molecule;
|
||||
|
||||
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|
||||
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
|
||||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
|
||||
evdwl *= 0.5; ecoul *= 0.5;
|
||||
evdwl *= 0.5;
|
||||
ecoul *= 0.5;
|
||||
if (newton || i < nlocal) {
|
||||
if (molid[i] == molid[j]) {
|
||||
etotal[0] += evdwl; etotal[1] += ecoul;
|
||||
etotal[0] += evdwl;
|
||||
etotal[1] += ecoul;
|
||||
} else {
|
||||
etotal[2] += evdwl; etotal[3] += ecoul;
|
||||
etotal[2] += evdwl;
|
||||
etotal[3] += ecoul;
|
||||
}
|
||||
}
|
||||
if (newton || j < nlocal) {
|
||||
if (molid[i] == molid[j]) {
|
||||
etotal[0] += evdwl; etotal[1] += ecoul;
|
||||
etotal[0] += evdwl;
|
||||
etotal[1] += ecoul;
|
||||
} else {
|
||||
etotal[2] += evdwl; etotal[3] += ecoul;
|
||||
etotal[2] += evdwl;
|
||||
etotal[3] += ecoul;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -127,10 +126,9 @@ void ComputePEMolTally::compute_vector()
|
||||
{
|
||||
invoked_vector = update->ntimestep;
|
||||
if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// sum accumulated energies across procs
|
||||
|
||||
MPI_Allreduce(etotal,vector,size_vector,MPI_DOUBLE,MPI_SUM,world);
|
||||
MPI_Allreduce(etotal, vector, size_vector, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
@ -15,26 +14,24 @@
|
||||
#include "compute_pe_tally.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "group.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include "memory.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg)
|
||||
ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
|
||||
{
|
||||
if (narg < 4) error->all(FLERR,"Illegal compute pe/tally command");
|
||||
if (narg < 4) error->all(FLERR, "Illegal compute pe/tally command");
|
||||
|
||||
igroup2 = group->find(arg[3]);
|
||||
if (igroup2 == -1)
|
||||
error->all(FLERR,"Could not find compute pe/tally second group ID");
|
||||
if (igroup2 == -1) error->all(FLERR, "Could not find compute pe/tally second group ID");
|
||||
groupbit2 = group->bitmask[igroup2];
|
||||
|
||||
scalar_flag = 1;
|
||||
@ -45,7 +42,7 @@ ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
comm_reverse = size_peratom_cols = 2;
|
||||
extscalar = 1;
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
|
||||
did_setup = invoked_peratom = invoked_scalar = -1;
|
||||
nmax = -1;
|
||||
@ -67,17 +64,16 @@ ComputePETally::~ComputePETally()
|
||||
void ComputePETally::init()
|
||||
{
|
||||
if (force->pair == nullptr)
|
||||
error->all(FLERR,"Trying to use compute pe/tally without a pair style");
|
||||
error->all(FLERR, "Trying to use compute pe/tally without a pair style");
|
||||
else
|
||||
force->pair->add_tally_callback(this);
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
|
||||
error->warning(FLERR,"Compute pe/tally used with incompatible pair style");
|
||||
error->warning(FLERR, "Compute pe/tally used with incompatible pair style");
|
||||
|
||||
if (force->bond || force->angle || force->dihedral
|
||||
|| force->improper || force->kspace)
|
||||
error->warning(FLERR,"Compute pe/tally only called from pair style");
|
||||
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
|
||||
error->warning(FLERR, "Compute pe/tally only called from pair style");
|
||||
}
|
||||
did_setup = -1;
|
||||
}
|
||||
@ -98,14 +94,13 @@ void ComputePETally::pair_setup_callback(int, int)
|
||||
if (atom->nmax > nmax) {
|
||||
memory->destroy(eatom);
|
||||
nmax = atom->nmax;
|
||||
memory->create(eatom,nmax,size_peratom_cols,"pe/tally:eatom");
|
||||
memory->create(eatom, nmax, size_peratom_cols, "pe/tally:eatom");
|
||||
array_atom = eatom;
|
||||
}
|
||||
|
||||
// clear storage
|
||||
|
||||
for (int i=0; i < ntotal; ++i)
|
||||
eatom[i][0] = eatom[i][1] = 0.0;
|
||||
for (int i = 0; i < ntotal; ++i) eatom[i][0] = eatom[i][1] = 0.0;
|
||||
|
||||
vector[0] = etotal[0] = vector[1] = etotal[1] = 0.0;
|
||||
|
||||
@ -113,23 +108,27 @@ void ComputePETally::pair_setup_callback(int, int)
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton,
|
||||
double evdwl, double ecoul, double,
|
||||
double, double, double)
|
||||
void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton, double evdwl,
|
||||
double ecoul, double, double, double, double)
|
||||
{
|
||||
const int * const mask = atom->mask;
|
||||
const int *const mask = atom->mask;
|
||||
|
||||
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|
||||
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
|
||||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
|
||||
evdwl *= 0.5; ecoul *= 0.5;
|
||||
evdwl *= 0.5;
|
||||
ecoul *= 0.5;
|
||||
if (newton || i < nlocal) {
|
||||
etotal[0] += evdwl; eatom[i][0] += evdwl;
|
||||
etotal[1] += ecoul; eatom[i][1] += ecoul;
|
||||
etotal[0] += evdwl;
|
||||
eatom[i][0] += evdwl;
|
||||
etotal[1] += ecoul;
|
||||
eatom[i][1] += ecoul;
|
||||
}
|
||||
if (newton || j < nlocal) {
|
||||
etotal[0] += evdwl; eatom[j][0] += evdwl;
|
||||
etotal[1] += ecoul; eatom[j][1] += ecoul;
|
||||
etotal[0] += evdwl;
|
||||
eatom[j][0] += evdwl;
|
||||
etotal[1] += ecoul;
|
||||
eatom[j][1] += ecoul;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -138,7 +137,7 @@ void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton,
|
||||
|
||||
int ComputePETally::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
int i, m, last;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
@ -153,7 +152,7 @@ int ComputePETally::pack_reverse_comm(int n, int first, double *buf)
|
||||
|
||||
void ComputePETally::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
int i, j, m;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -168,15 +167,14 @@ void ComputePETally::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
double ComputePETally::compute_scalar()
|
||||
{
|
||||
invoked_scalar = update->ntimestep;
|
||||
if ((did_setup != invoked_scalar)
|
||||
|| (update->eflag_global != invoked_scalar))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
if ((did_setup != invoked_scalar) || (update->eflag_global != invoked_scalar))
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// sum accumulated energies across procs
|
||||
|
||||
MPI_Allreduce(etotal,vector,size_peratom_cols,MPI_DOUBLE,MPI_SUM,world);
|
||||
MPI_Allreduce(etotal, vector, size_peratom_cols, MPI_DOUBLE, MPI_SUM, world);
|
||||
|
||||
scalar = vector[0]+vector[1];
|
||||
scalar = vector[0] + vector[1];
|
||||
return scalar;
|
||||
}
|
||||
|
||||
@ -185,9 +183,8 @@ double ComputePETally::compute_scalar()
|
||||
void ComputePETally::compute_peratom()
|
||||
{
|
||||
invoked_peratom = update->ntimestep;
|
||||
if ((did_setup != invoked_peratom)
|
||||
|| (update->eflag_global != invoked_peratom))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
if ((did_setup != invoked_peratom) || (update->eflag_global != invoked_peratom))
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// collect contributions from ghost atoms
|
||||
|
||||
@ -196,8 +193,7 @@ void ComputePETally::compute_peratom()
|
||||
|
||||
// clear out ghost atom data after it has been collected to local atoms
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
for (int i = atom->nlocal; i < nall; ++i)
|
||||
eatom[i][0] = eatom[i][1] = 0.0;
|
||||
for (int i = atom->nlocal; i < nall; ++i) eatom[i][0] = eatom[i][1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -207,7 +203,6 @@ void ComputePETally::compute_peratom()
|
||||
|
||||
double ComputePETally::memory_usage()
|
||||
{
|
||||
double bytes = (nmax < 0) ? 0 : nmax*size_peratom_cols * sizeof(double);
|
||||
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
@ -15,27 +14,25 @@
|
||||
#include "compute_stress_tally.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "group.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg)
|
||||
ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg)
|
||||
{
|
||||
if (narg < 4) error->all(FLERR,"Illegal compute stress/tally command");
|
||||
if (narg < 4) error->all(FLERR, "Illegal compute stress/tally command");
|
||||
|
||||
igroup2 = group->find(arg[3]);
|
||||
if (igroup2 == -1)
|
||||
error->all(FLERR,"Could not find compute stress/tally second group ID");
|
||||
if (igroup2 == -1) error->all(FLERR, "Could not find compute stress/tally second group ID");
|
||||
groupbit2 = group->bitmask[igroup2];
|
||||
|
||||
scalar_flag = 1;
|
||||
@ -46,7 +43,7 @@ ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
comm_reverse = size_peratom_cols = 6;
|
||||
extscalar = 0;
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
peflag = 1; // we need Pair::ev_tally() to be run
|
||||
|
||||
did_setup = invoked_peratom = invoked_scalar = -1;
|
||||
nmax = -1;
|
||||
@ -70,17 +67,16 @@ ComputeStressTally::~ComputeStressTally()
|
||||
void ComputeStressTally::init()
|
||||
{
|
||||
if (force->pair == nullptr)
|
||||
error->all(FLERR,"Trying to use compute stress/tally without pair style");
|
||||
error->all(FLERR, "Trying to use compute stress/tally without pair style");
|
||||
else
|
||||
force->pair->add_tally_callback(this);
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (force->pair->single_enable == 0 || force->pair->manybody_flag)
|
||||
error->warning(FLERR,"Compute stress/tally used with incompatible pair style");
|
||||
error->warning(FLERR, "Compute stress/tally used with incompatible pair style");
|
||||
|
||||
if (force->bond || force->angle || force->dihedral
|
||||
|| force->improper || force->kspace)
|
||||
error->warning(FLERR,"Compute stress/tally only called from pair style");
|
||||
if (force->bond || force->angle || force->dihedral || force->improper || force->kspace)
|
||||
error->warning(FLERR, "Compute stress/tally only called from pair style");
|
||||
}
|
||||
did_setup = -1;
|
||||
}
|
||||
@ -101,55 +97,64 @@ void ComputeStressTally::pair_setup_callback(int, int)
|
||||
if (atom->nmax > nmax) {
|
||||
memory->destroy(stress);
|
||||
nmax = atom->nmax;
|
||||
memory->create(stress,nmax,size_peratom_cols,"stress/tally:stress");
|
||||
memory->create(stress, nmax, size_peratom_cols, "stress/tally:stress");
|
||||
array_atom = stress;
|
||||
}
|
||||
|
||||
// clear storage
|
||||
|
||||
for (int i=0; i < ntotal; ++i)
|
||||
for (int j=0; j < size_peratom_cols; ++j)
|
||||
stress[i][j] = 0.0;
|
||||
for (int i = 0; i < ntotal; ++i)
|
||||
for (int j = 0; j < size_peratom_cols; ++j) stress[i][j] = 0.0;
|
||||
|
||||
for (int i=0; i < size_peratom_cols; ++i)
|
||||
vector[i] = virial[i] = 0.0;
|
||||
for (int i = 0; i < size_peratom_cols; ++i) vector[i] = virial[i] = 0.0;
|
||||
|
||||
did_setup = update->ntimestep;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newton,
|
||||
double, double, double fpair,
|
||||
double dx, double dy, double dz)
|
||||
void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newton, double, double,
|
||||
double fpair, double dx, double dy, double dz)
|
||||
{
|
||||
const int * const mask = atom->mask;
|
||||
const int *const mask = atom->mask;
|
||||
|
||||
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|
||||
|| ((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
if (((mask[i] & groupbit) && (mask[j] & groupbit2)) ||
|
||||
((mask[i] & groupbit2) && (mask[j] & groupbit))) {
|
||||
|
||||
fpair *= 0.5;
|
||||
const double v0 = dx*dx*fpair;
|
||||
const double v1 = dy*dy*fpair;
|
||||
const double v2 = dz*dz*fpair;
|
||||
const double v3 = dx*dy*fpair;
|
||||
const double v4 = dx*dz*fpair;
|
||||
const double v5 = dy*dz*fpair;
|
||||
const double v0 = dx * dx * fpair;
|
||||
const double v1 = dy * dy * fpair;
|
||||
const double v2 = dz * dz * fpair;
|
||||
const double v3 = dx * dy * fpair;
|
||||
const double v4 = dx * dz * fpair;
|
||||
const double v5 = dy * dz * fpair;
|
||||
|
||||
if (newton || i < nlocal) {
|
||||
virial[0] += v0; stress[i][0] += v0;
|
||||
virial[1] += v1; stress[i][1] += v1;
|
||||
virial[2] += v2; stress[i][2] += v2;
|
||||
virial[3] += v3; stress[i][3] += v3;
|
||||
virial[4] += v4; stress[i][4] += v4;
|
||||
virial[5] += v5; stress[i][5] += v5;
|
||||
virial[0] += v0;
|
||||
stress[i][0] += v0;
|
||||
virial[1] += v1;
|
||||
stress[i][1] += v1;
|
||||
virial[2] += v2;
|
||||
stress[i][2] += v2;
|
||||
virial[3] += v3;
|
||||
stress[i][3] += v3;
|
||||
virial[4] += v4;
|
||||
stress[i][4] += v4;
|
||||
virial[5] += v5;
|
||||
stress[i][5] += v5;
|
||||
}
|
||||
if (newton || j < nlocal) {
|
||||
virial[0] += v0; stress[j][0] += v0;
|
||||
virial[1] += v1; stress[j][1] += v1;
|
||||
virial[2] += v2; stress[j][2] += v2;
|
||||
virial[3] += v3; stress[j][3] += v3;
|
||||
virial[4] += v4; stress[j][4] += v4;
|
||||
virial[5] += v5; stress[j][5] += v5;
|
||||
virial[0] += v0;
|
||||
stress[j][0] += v0;
|
||||
virial[1] += v1;
|
||||
stress[j][1] += v1;
|
||||
virial[2] += v2;
|
||||
stress[j][2] += v2;
|
||||
virial[3] += v3;
|
||||
stress[j][3] += v3;
|
||||
virial[4] += v4;
|
||||
stress[j][4] += v4;
|
||||
virial[5] += v5;
|
||||
stress[j][5] += v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -158,7 +163,7 @@ void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newto
|
||||
|
||||
int ComputeStressTally::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
int i, m, last;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
@ -177,7 +182,7 @@ int ComputeStressTally::pack_reverse_comm(int n, int first, double *buf)
|
||||
|
||||
void ComputeStressTally::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
int i, j, m;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -196,18 +201,17 @@ void ComputeStressTally::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
double ComputeStressTally::compute_scalar()
|
||||
{
|
||||
invoked_scalar = update->ntimestep;
|
||||
if ((did_setup != invoked_scalar)
|
||||
|| (update->eflag_global != invoked_scalar))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
if ((did_setup != invoked_scalar) || (update->eflag_global != invoked_scalar))
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// sum accumulated forces across procs
|
||||
|
||||
MPI_Allreduce(virial,vector,size_peratom_cols,MPI_DOUBLE,MPI_SUM,world);
|
||||
MPI_Allreduce(virial, vector, size_peratom_cols, MPI_DOUBLE, MPI_SUM, world);
|
||||
|
||||
if (domain->dimension == 3)
|
||||
scalar = (vector[0]+vector[1]+vector[2])/3.0;
|
||||
scalar = (vector[0] + vector[1] + vector[2]) / 3.0;
|
||||
else
|
||||
scalar = (vector[0]+vector[1])/2.0;
|
||||
scalar = (vector[0] + vector[1]) / 2.0;
|
||||
|
||||
return scalar;
|
||||
}
|
||||
@ -217,9 +221,8 @@ double ComputeStressTally::compute_scalar()
|
||||
void ComputeStressTally::compute_peratom()
|
||||
{
|
||||
invoked_peratom = update->ntimestep;
|
||||
if ((did_setup != invoked_peratom)
|
||||
|| (update->eflag_global != invoked_peratom))
|
||||
error->all(FLERR,"Energy was not tallied on needed timestep");
|
||||
if ((did_setup != invoked_peratom) || (update->eflag_global != invoked_peratom))
|
||||
error->all(FLERR, "Energy was not tallied on needed timestep");
|
||||
|
||||
// collect contributions from ghost atoms
|
||||
|
||||
@ -228,8 +231,7 @@ void ComputeStressTally::compute_peratom()
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
for (int i = atom->nlocal; i < nall; ++i)
|
||||
for (int j = 0; j < size_peratom_cols; ++j)
|
||||
stress[i][j] = 0.0;
|
||||
for (int j = 0; j < size_peratom_cols; ++j) stress[i][j] = 0.0;
|
||||
}
|
||||
|
||||
// convert to stress*volume units = -pressure*volume
|
||||
@ -251,7 +253,6 @@ void ComputeStressTally::compute_peratom()
|
||||
|
||||
double ComputeStressTally::memory_usage()
|
||||
{
|
||||
double bytes = (nmax < 0) ? 0 : nmax*size_peratom_cols * sizeof(double);
|
||||
double bytes = (nmax < 0) ? 0 : nmax * (double)size_peratom_cols * sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ CUB_URL="https://github.com/NVlabs/cub/archive/1.12.0.tar.gz"
|
||||
KOKKOS_URL="https://github.com/kokkos/kokkos/archive/3.4.01.tar.gz"
|
||||
KIM_URL="https://s3.openkim.org/kim-api/kim-api-2.2.1.txz"
|
||||
MSCG_URL="https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz"
|
||||
PLUMED_URL="https://github.com/plumed/plumed2/releases/download/v2.7.1/plumed-src-2.7.1.tgz"
|
||||
PLUMED_URL="https://github.com/plumed/plumed2/releases/download/v2.7.2/plumed-src-2.7.2.tgz"
|
||||
PACELIB_URL="https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2021.4.9.tar.gz"
|
||||
LATTE_URL="https://github.com/lanl/LATTE/archive/v1.2.2.tar.gz"
|
||||
SCAFACOS_URL="https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 2 Jul 2021
|
||||
date_generated: Wed Jul 21 15:49:45 2021
|
||||
epsilon: 1e-11
|
||||
epsilon: 2e-11
|
||||
prerequisites: ! |
|
||||
pair reaxff
|
||||
fix qeq/reaxff
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 2 Jul 2021
|
||||
date_generated: Wed Jul 21 15:49:47 2021
|
||||
epsilon: 1e-12
|
||||
epsilon: 3e-12
|
||||
prerequisites: ! |
|
||||
pair reaxff
|
||||
fix qeq/reaxff
|
||||
@ -281,7 +281,6 @@ TEST_F(FileOperationsTest, error_message_warn)
|
||||
|
||||
TEST_F(FileOperationsTest, error_all_one)
|
||||
{
|
||||
char buf[64];
|
||||
BEGIN_HIDE_OUTPUT();
|
||||
command("echo none");
|
||||
command("log none");
|
||||
|
||||
@ -94,7 +94,8 @@ TEST(Tokenizer, copy_constructor)
|
||||
|
||||
TEST(Tokenizer, move_constructor)
|
||||
{
|
||||
Tokenizer u = std::move(Tokenizer("test new word ", " "));
|
||||
Tokenizer t("test new word ", " ");
|
||||
Tokenizer u = std::move(t);
|
||||
ASSERT_THAT(u.next(), Eq("test"));
|
||||
ASSERT_THAT(u.next(), Eq("new"));
|
||||
ASSERT_THAT(u.next(), Eq("word"));
|
||||
@ -248,7 +249,8 @@ TEST(ValueTokenizer, copy_constructor)
|
||||
|
||||
TEST(ValueTokenizer, move_constructor)
|
||||
{
|
||||
ValueTokenizer u = std::move(ValueTokenizer(" test new word ", " "));
|
||||
ValueTokenizer t(" test new word ", " ");
|
||||
ValueTokenizer u = std::move(t);
|
||||
ASSERT_THAT(u.next_string(), Eq("test"));
|
||||
ASSERT_THAT(u.next_string(), Eq("new"));
|
||||
ASSERT_THAT(u.next_string(), Eq("word"));
|
||||
|
||||
Reference in New Issue
Block a user