diff --git a/doc/src/JPG/user_intel.png b/doc/src/JPG/user_intel.png
index 0ebb2d1ae0..302b50124a 100755
Binary files a/doc/src/JPG/user_intel.png and b/doc/src/JPG/user_intel.png differ
diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt
index dd24f8465a..444e901a40 100644
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@@ -1,7 +1,7 @@
LAMMPS Users Manual
-
+
@@ -21,7 +21,7 @@
LAMMPS Documentation :c,h3
-19 May 2017 version :c,h4
+23 Jun 2017 version :c,h4
Version info: :h4
diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt
index 8aa5bdec2e..0fbab732c8 100644
--- a/doc/src/Section_commands.txt
+++ b/doc/src/Section_commands.txt
@@ -964,7 +964,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"lj/expand (gko)"_pair_lj_expand.html,
"lj/gromacs (gko)"_pair_gromacs.html,
"lj/gromacs/coul/gromacs (ko)"_pair_gromacs.html,
-"lj/long/coul/long (o)"_pair_lj_long.html,
+"lj/long/coul/long (io)"_pair_lj_long.html,
"lj/long/dipole/long"_pair_dipole.html,
"lj/long/tip4p/long"_pair_lj_long.html,
"lj/smooth (o)"_pair_lj_smooth.html,
@@ -1073,7 +1073,7 @@ package"_Section_start.html#start_3.
"table/rx"_pair_table_rx.html,
"tersoff/table (o)"_pair_tersoff.html,
"thole"_pair_thole.html,
-"tip4p/long/soft (o)"_pair_lj_soft.html :tb(c=4,ea=c)
+"tip4p/long/soft (o)"_pair_lj_soft.html :tb(c=4,ea=c)
:line
@@ -1225,7 +1225,7 @@ USER-OMP, t = OPT.
"msm/cg (o)"_kspace_style.html,
"pppm (go)"_kspace_style.html,
"pppm/cg (o)"_kspace_style.html,
-"pppm/disp"_kspace_style.html,
+"pppm/disp (i)"_kspace_style.html,
"pppm/disp/tip4p"_kspace_style.html,
"pppm/stagger"_kspace_style.html,
"pppm/tip4p (o)"_kspace_style.html :tb(c=4,ea=c)
diff --git a/doc/src/Section_howto.txt b/doc/src/Section_howto.txt
index 579cb68474..f2f2561af8 100644
--- a/doc/src/Section_howto.txt
+++ b/doc/src/Section_howto.txt
@@ -1938,7 +1938,7 @@ documentation in the src/library.cpp file for details, including
which quantities can be queried by name:
void *lammps_extract_global(void *, char *)
-void lammps_extract_box(void *, double *, double *,
+void lammps_extract_box(void *, double *, double *,
double *, double *, double *, int *, int *)
void *lammps_extract_atom(void *, char *)
void *lammps_extract_compute(void *, char *, int, int)
@@ -2682,14 +2682,14 @@ bond_coeff 2 25.724 0.0 :pre
When running dynamics with the adiabatic core/shell model, the
following issues should be considered. The relative motion of
-the core and shell particles corresponds to the polarization,
-hereby an instantaneous relaxation of the shells is approximated
+the core and shell particles corresponds to the polarization,
+hereby an instantaneous relaxation of the shells is approximated
and a fast core/shell spring frequency ensures a nearly constant
-internal kinetic energy during the simulation.
+internal kinetic energy during the simulation.
Thermostats can alter this polarization behaviour, by scaling the
-internal kinetic energy, meaning the shell will not react freely to
-its electrostatic environment.
-Therefore it is typically desirable to decouple the relative motion of
+internal kinetic energy, meaning the shell will not react freely to
+its electrostatic environment.
+Therefore it is typically desirable to decouple the relative motion of
the core/shell pair, which is an imaginary degree of freedom, from the
real physical system. To do that, the "compute
temp/cs"_compute_temp_cs.html command can be used, in conjunction with
@@ -2721,13 +2721,13 @@ fix thermostatequ all nve # integrator as needed f
fix_modify thermoberendsen temp CSequ
thermo_modify temp CSequ # output of center-of-mass derived temperature :pre
-The pressure for the core/shell system is computed via the regular
-LAMMPS convention by "treating the cores and shells as individual
-particles"_#MitchellFincham2. For the thermo output of the pressure
-as well as for the application of a barostat, it is necessary to
-use an additional "pressure"_compute_pressure compute based on the
-default "temperature"_compute_temp and specifying it as a second
-argument in "fix modify"_fix_modify.html and
+The pressure for the core/shell system is computed via the regular
+LAMMPS convention by "treating the cores and shells as individual
+particles"_#MitchellFincham2. For the thermo output of the pressure
+as well as for the application of a barostat, it is necessary to
+use an additional "pressure"_compute_pressure compute based on the
+default "temperature"_compute_temp and specifying it as a second
+argument in "fix modify"_fix_modify.html and
"thermo_modify"_thermo_modify.html resulting in:
(...)
@@ -2757,18 +2757,18 @@ temp/cs"_compute_temp_cs.html command to the {temp} keyword of the
velocity all create 1427 134 bias yes temp CSequ
velocity all scale 1427 temp CSequ :pre
-To maintain the correct polarizability of the core/shell pairs, the
-kinetic energy of the internal motion shall remain nearly constant.
-Therefore the choice of spring force and mass ratio need to ensure
-much faster relative motion of the 2 atoms within the core/shell pair
-than their center-of-mass velocity. This allows the shells to
-effectively react instantaneously to the electrostatic environment and
+To maintain the correct polarizability of the core/shell pairs, the
+kinetic energy of the internal motion shall remain nearly constant.
+Therefore the choice of spring force and mass ratio need to ensure
+much faster relative motion of the 2 atoms within the core/shell pair
+than their center-of-mass velocity. This allows the shells to
+effectively react instantaneously to the electrostatic environment and
limits energy transfer to or from the core/shell oscillators.
This fast movement also dictates the timestep that can be used.
The primary literature of the adiabatic core/shell model suggests that
the fast relative motion of the core/shell pairs only allows negligible
-energy transfer to the environment.
+energy transfer to the environment.
The mentioned energy transfer will typically lead to a small drift
in total energy over time. This internal energy can be monitored
using the "compute chunk/atom"_compute_chunk_atom.html and "compute
@@ -2790,7 +2790,7 @@ pairs as chunks.
For example if core/shell pairs are the only molecules:
-read_data NaCl_CS_x0.1_prop.data
+read_data NaCl_CS_x0.1_prop.data
compute prop all property/atom molecule
compute cs_chunk all chunk/atom c_prop
compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 # note the chosen degrees of freedom for the core/shell pairs
diff --git a/doc/src/Section_packages.txt b/doc/src/Section_packages.txt
index 14b2c0baa3..24506379c3 100644
--- a/doc/src/Section_packages.txt
+++ b/doc/src/Section_packages.txt
@@ -585,7 +585,7 @@ do not recommend building with other acceleration packages installed
make yes-kokkos
make machine :pre
-
+
make no-kokkos
make machine :pre
@@ -839,13 +839,13 @@ written and read in parallel.
Note that MPIIO is part of the standard message-passing interface
(MPI) library, so you should not need any additional compiler or link
settings, beyond what LAMMPS normally uses for MPI on your system.
-
+
make yes-mpiio
make machine :pre
-
+
make no-mpiio
make machine :pre
-
+
[Supporting info:]
src/MPIIO: filenames -> commands
@@ -855,7 +855,7 @@ src/MPIIO: filenames -> commands
"read_restart"_read_restart.html :ul
:line
-
+
MSCG package :link(mscg),h4
[Contents:]
@@ -914,7 +914,7 @@ lib/mscg/README
examples/mscg :ul
:line
-
+
OPT package :link(OPT),h4
[Contents:]
@@ -1387,7 +1387,7 @@ atomic information to continuum fields.
[Authors:] Reese Jones, Jeremy Templeton, Jon Zimmerman (Sandia).
[Install or un-install:]
-
+
Before building LAMMPS with this package, you must first build the ATC
library in lib/atc. You can do this manually if you prefer; follow
the instructions in lib/atc/README. You can also do it in one step
@@ -1420,10 +1420,10 @@ usual manner:
make yes-user-atc
make machine :pre
-
+
make no-user-atc
make machine :pre
-
+
[Supporting info:]
src/USER-ATC: filenames -> commands
@@ -1446,7 +1446,7 @@ model.
[Author:] Ilya Valuev (JIHT, Russia).
[Install or un-install:]
-
+
Before building LAMMPS with this package, you must first build the
AWPMD library in lib/awpmd. You can do this manually if you prefer;
follow the instructions in lib/awpmd/README. You can also do it in
@@ -1479,10 +1479,10 @@ usual manner:
make yes-user-awpmd
make machine :pre
-
+
make no-user-awpmd
make machine :pre
-
+
[Supporting info:]
src/USER-AWPMD: filenames -> commands
@@ -1505,13 +1505,13 @@ stability.
[Author:] Oliver Henrich (University of Strathclyde, Glasgow).
[Install or un-install:]
-
+
make yes-user-cgdna
make machine :pre
-
+
make no-user-cgdna
make machine :pre
-
+
[Supporting info:]
src/USER-CGDNA: filenames -> commands
@@ -1536,13 +1536,13 @@ acids.
[Author:] Axel Kohlmeyer (Temple U).
[Install or un-install:]
-
+
make yes-user-cgsdk
make machine :pre
-
+
make no-user-cgsdk
make machine :pre
-
+
[Supporting info:]
src/USER-CGSDK: filenames -> commands
@@ -1570,7 +1570,7 @@ by Giacomo Fiorin (ICMS, Temple University, Philadelphia, PA, USA) and
Jerome Henin (LISM, CNRS, Marseille, France).
[Install or un-install:]
-
+
Before building LAMMPS with this package, you must first build the
COLVARS library in lib/colvars. You can do this manually if you
prefer; follow the instructions in lib/colvars/README. You can also
@@ -1594,10 +1594,10 @@ usual manner:
make yes-user-colvars
make machine :pre
-
+
make no-user-colvars
make machine :pre
-
+
[Supporting info:]
src/USER-COLVARS: filenames -> commands
@@ -1619,13 +1619,13 @@ intensities based on kinematic diffraction theory.
[Author:] Shawn Coleman while at the U Arkansas.
[Install or un-install:]
-
+
make yes-user-diffraction
make machine :pre
-
+
make no-user-diffraction
make machine :pre
-
+
[Supporting info:]
src/USER-DIFFRACTION: filenames -> commands
@@ -1654,13 +1654,13 @@ algorithm.
Brennan (ARL).
[Install or un-install:]
-
+
make yes-user-dpd
make machine :pre
-
+
make no-user-dpd
make machine :pre
-
+
[Supporting info:]
src/USER-DPD: filenames -> commands
@@ -1696,13 +1696,13 @@ tools/drude.
Devemy (CNRS), and Agilio Padua (U Blaise Pascal).
[Install or un-install:]
-
+
make yes-user-drude
make machine :pre
-
+
make no-user-drude
make machine :pre
-
+
[Supporting info:]
src/USER-DRUDE: filenames -> commands
@@ -1734,13 +1734,13 @@ tools/eff; see its README file.
[Author:] Andres Jaramillo-Botero (CalTech).
[Install or un-install:]
-
+
make yes-user-eff
make machine :pre
-
+
make no-user-eff
make machine :pre
-
+
[Supporting info:]
src/USER-EFF: filenames -> commands
@@ -1773,13 +1773,13 @@ for using this package in tools/fep; see its README file.
[Author:] Agilio Padua (Universite Blaise Pascal Clermont-Ferrand)
[Install or un-install:]
-
+
make yes-user-fep
make machine :pre
-
+
make no-user-fep
make machine :pre
-
+
[Supporting info:]
src/USER-FEP: filenames -> commands
@@ -1836,13 +1836,13 @@ file.
You can then install/un-install the package and build LAMMPS in the
usual manner:
-
+
make yes-user-h5md
make machine :pre
-
+
make no-user-h5md
make machine :pre
-
+
[Supporting info:]
src/USER-H5MD: filenames -> commands
@@ -1908,7 +1908,7 @@ explained in "Section 5.3.2"_accelerate_intel.html.
make yes-user-intel yes-user-omp
make machine :pre
-
+
make no-user-intel no-user-omp
make machine :pre
@@ -1938,13 +1938,13 @@ can be used to model MD particles influenced by hydrodynamic forces.
Ontario).
[Install or un-install:]
-
+
make yes-user-lb
make machine :pre
-
+
make no-user-lb
make machine :pre
-
+
[Supporting info:]
src/USER-LB: filenames -> commands
@@ -1972,13 +1972,13 @@ matrix-MGPT algorithm due to Tomas Oppelstrup at LLNL.
[Authors:] Tomas Oppelstrup and John Moriarty (LLNL).
[Install or un-install:]
-
+
make yes-user-mgpt
make machine :pre
-
+
make no-user-mgpt
make machine :pre
-
+
[Supporting info:]
src/USER-MGPT: filenames -> commands
@@ -2000,13 +2000,13 @@ dihedral, improper, or command style.
src/USER-MISC/README file.
[Install or un-install:]
-
+
make yes-user-misc
make machine :pre
-
+
make no-user-misc
make machine :pre
-
+
[Supporting info:]
src/USER-MISC: filenames -> commands
@@ -2031,13 +2031,13 @@ n = grad(g).
Netherlands; since 2017: Brandeis University, Waltham, MA, USA)
[Install or un-install:]
-
+
make yes-user-manifold
make machine :pre
-
+
make no-user-manifold
make machine :pre
-
+
[Supporting info:]
src/USER-MANIFOLD: filenames -> commands
@@ -2080,7 +2080,7 @@ at
[Author:] Axel Kohlmeyer (Temple U).
[Install or un-install:]
-
+
Note that the lib/molfile/Makefile.lammps file has a setting for a
dynamic loading library libdl.a that should is typically present on
all systems, which is required for LAMMPS to link with this package.
@@ -2090,10 +2090,10 @@ lib/molfile/Makefile.lammps for details.
make yes-user-molfile
make machine :pre
-
+
make no-user-molfile
make machine :pre
-
+
[Supporting info:]
src/USER-MOLFILE: filenames -> commands
@@ -2128,7 +2128,7 @@ tools:
[Author:] Lars Pastewka (Karlsruhe Institute of Technology).
[Install or un-install:]
-
+
Note that to follow these steps, you need the standard NetCDF software
package installed on your system. The lib/netcdf/Makefile.lammps file
has settings for NetCDF include and library files that LAMMPS needs to
@@ -2138,7 +2138,7 @@ lib/netcdf/README for details.
make yes-user-netcdf
make machine :pre
-
+
make no-user-netcdf
make machine :pre
@@ -2178,10 +2178,10 @@ Once you have an appropriate Makefile.machine, you can
install/un-install the package and build LAMMPS in the usual manner:
[Install or un-install:]
-
+
make yes-user-omp
make machine :pre
-
+
make no-user-omp
make machine :pre
@@ -2213,13 +2213,13 @@ relations, directly from molecular dynamics simulations.
[Author:] Ling-Ti Kong (Shanghai Jiao Tong University).
[Install or un-install:]
-
+
make yes-user-phonon
make machine :pre
-
+
make no-user-phonon
make machine :pre
-
+
[Supporting info:]
src/USER-PHONON: filenames -> commands
@@ -2235,7 +2235,7 @@ USER-QMMM package :link(USER-QMMM),h4
A "fix qmmm"_fix_qmmm.html command which allows LAMMPS to be used in a
QM/MM simulation, currently only in combination with the "Quantum
-ESPRESSO"_espresso package.
+ESPRESSO"_espresso package.
:link(espresso,http://www.quantum-espresso.org)
@@ -2275,7 +2275,7 @@ usual manner:
make yes-user-qmmm
make machine :pre
-
+
make no-user-qmmm
make machine :pre
@@ -2284,7 +2284,7 @@ for a QM/MM simulation. You must also build Quantum ESPRESSO and
create a new executable which links LAMMPS and Quanutm ESPRESSO
together. These are steps 3 and 4 described in the lib/qmmm/README
file.
-
+
[Supporting info:]
src/USER-QMMM: filenames -> commands
@@ -2312,13 +2312,13 @@ simulation.
[Author:] Yuan Shen (Stanford U).
[Install or un-install:]
-
+
make yes-user-qtb
make machine :pre
-
+
make no-user-qtb
make machine :pre
-
+
[Supporting info:]
src/USER-QTB: filenames -> commands
@@ -2362,10 +2362,10 @@ usual manner:
make yes-user-quip
make machine :pre
-
+
make no-user-quip
make machine :pre
-
+
[Supporting info:]
src/USER-QUIP: filenames -> commands
@@ -2388,13 +2388,13 @@ for monitoring molecules as bonds are created and destroyed.
[Author:] Hasan Metin Aktulga (MSU) while at Purdue University.
[Install or un-install:]
-
+
make yes-user-reaxc
make machine :pre
-
+
make no-user-reaxc
make machine :pre
-
+
[Supporting info:]
src/USER-REAXC: filenames -> commands
@@ -2451,10 +2451,10 @@ usual manner:
make yes-user-smd
make machine :pre
-
+
make no-user-smd
make machine :pre
-
+
[Supporting info:]
src/USER-SMD: filenames -> commands
@@ -2477,13 +2477,13 @@ ionocovalent bonds in oxides.
Tetot (LAAS-CNRS, France).
[Install or un-install:]
-
+
make yes-user-smtbq
make machine :pre
-
+
make no-user-smtbq
make machine :pre
-
+
[Supporting info:]
src/USER-SMTBQ: filenames -> commands
@@ -2516,13 +2516,13 @@ property/atom"_compute_property_atom.html command.
Dynamics, Ernst Mach Institute, Germany).
[Install or un-install:]
-
+
make yes-user-sph
make machine :pre
-
+
make no-user-sph
make machine :pre
-
+
[Supporting info:]
src/USER-SPH: filenames -> commands
@@ -2544,13 +2544,13 @@ stress, etc) about individual interactions.
[Author:] Axel Kohlmeyer (Temple U).
[Install or un-install:]
-
+
make yes-user-tally
make machine :pre
-
+
make no-user-tally
make machine :pre
-
+
[Supporting info:]
src/USER-TALLY: filenames -> commands
@@ -2577,7 +2577,7 @@ system.
[Authors:] Richard Berger (JKU) and Daniel Queteschiner (DCS Computing).
[Install or un-install:]
-
+
The lib/vtk/Makefile.lammps file has settings for accessing VTK files
and its library, which are required for LAMMPS to build and link with
this package. If the settings are not valid for your system, check if
@@ -2590,10 +2590,10 @@ usual manner:
make yes-user-vtk
make machine :pre
-
+
make no-user-vtk
make machine :pre
-
+
[Supporting info:]
src/USER-VTK: filenames -> commands
diff --git a/doc/src/Section_python.txt b/doc/src/Section_python.txt
index 718e9e229c..1e67fca321 100644
--- a/doc/src/Section_python.txt
+++ b/doc/src/Section_python.txt
@@ -714,7 +714,7 @@ stored in the "image" property. All three image flags are stored in
a packed format in a single integer, so count would be 1 to retrieve
that integer, however also a count value of 3 can be used and then
the image flags will be unpacked into 3 individual integers, ordered
-in a similar fashion as coordinates.
+in a similar fashion as coordinates.
Note that the data structure gather_atoms("x") returns is different
from the data structure returned by extract_atom("x") in four ways.
diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt
index d629828f12..f5bd66aeba 100644
--- a/doc/src/accelerate_intel.txt
+++ b/doc/src/accelerate_intel.txt
@@ -30,8 +30,8 @@ Dihedral Styles: charmm, harmonic, opls :l
Fixes: nve, npt, nvt, nvt/sllod :l
Improper Styles: cvff, harmonic :l
Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
-charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l
-K-Space Styles: pppm :l
+charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l
+K-Space Styles: pppm, pppm/disp :l
:ule
[Speed-ups to expect:]
@@ -42,62 +42,88 @@ precision mode. Performance improvements are shown compared to
LAMMPS {without using other acceleration packages} as these are
under active development (and subject to performance changes). The
measurements were performed using the input files available in
-the src/USER-INTEL/TEST directory. These are scalable in size; the
-results given are with 512K particles (524K for Liquid Crystal).
-Most of the simulations are standard LAMMPS benchmarks (indicated
-by the filename extension in parenthesis) with modifications to the
-run length and to add a warmup run (for use with offload
-benchmarks).
+the src/USER-INTEL/TEST directory with the provided run script.
+These are scalable in size; the results given are with 512K
+particles (524K for Liquid Crystal). Most of the simulations are
+standard LAMMPS benchmarks (indicated by the filename extension in
+parenthesis) with modifications to the run length and to add a
+warmup run (for use with offload benchmarks).
:c,image(JPG/user_intel.png)
Results are speedups obtained on Intel Xeon E5-2697v4 processors
(code-named Broadwell) and Intel Xeon Phi 7250 processors
-(code-named Knights Landing) with "18 Jun 2016" LAMMPS built with
-Intel Parallel Studio 2016 update 3. Results are with 1 MPI task
+(code-named Knights Landing) with "June 2017" LAMMPS built with
+Intel Parallel Studio 2017 update 2. Results are with 1 MPI task
per physical core. See {src/USER-INTEL/TEST/README} for the raw
simulation rates and instructions to reproduce.
:line
+[Accuracy and order of operations:]
+
+In most molecular dynamics software, parallelization parameters
+(# of MPI, OpenMP, and vectorization) can change the results due
+to changing the order of operations with finite-precision
+calculations. The USER-INTEL package is deterministic. This means
+that the results should be reproducible from run to run with the
+{same} parallel configurations and when using determinstic
+libraries or library settings (MPI, OpenMP, FFT). However, there
+are differences in the USER-INTEL package that can change the
+order of operations compared to LAMMPS without acceleration:
+
+Neighbor lists can be created in a different order :ulb,l
+Bins used for sorting atoms can be oriented differently :l
+The default stencil order for PPPM is 7. By default, LAMMPS will
+calculate other PPPM parameters to fit the desired acuracy with
+this order :l
+The {newton} setting applies to all atoms, not just atoms shared
+between MPI tasks :l
+Vectorization can change the order for adding pairwise forces :l
+:ule
+
+The precision mode (described below) used with the USER-INTEL
+package can change the {accuracy} of the calculations. For the
+default {mixed} precision option, calculations between pairs or
+triplets of atoms are performed in single precision, intended to
+be within the inherent error of MD simulations. All accumulation
+is performed in double precision to prevent the error from growing
+with the number of atoms in the simulation. {Single} precision
+mode should not be used without appropriate validation.
+
+:line
+
[Quick Start for Experienced Users:]
LAMMPS should be built with the USER-INTEL package installed.
Simulations should be run with 1 MPI task per physical {core},
not {hardware thread}.
-For Intel Xeon CPUs:
-
Edit src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi as necessary. :ulb,l
-If using {kspace_style pppm} in the input script, add "neigh_modify binsize cutoff" and "kspace_modify diff ad" to the input script for better
-performance. Cutoff should be roughly the neighbor list cutoff. By
-default the binsize is half the neighbor list cutoff. :l
-"-pk intel 0 omp 2 -sf intel" added to LAMMPS command-line :l
+Set the environment variable KMP_BLOCKTIME=0 :l
+"-pk intel 0 omp $t -sf intel" added to LAMMPS command-line :l
+$t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l
+For some of the simple 2-body potentials without long-range
+electrostatics, performance and scalability can be better with
+the "newton off" setting added to the input script :l
+If using {kspace_style pppm} in the input script, add
+"kspace_modify diff ad" for better performance :l
:ule
-For Intel Xeon Phi CPUs for simulations without {kspace_style
-pppm} in the input script :
+For Intel Xeon Phi CPUs:
-Edit src/MAKE/OPTIONS/Makefile.knl as necessary. :ulb,l
-Runs should be performed using MCDRAM. :l
-"-pk intel 0 omp 2 -sf intel" {or} "-pk intel 0 omp 4 -sf intel"
-should be added to the LAMMPS command-line. Choice for best
-performance will depend on the simulation. :l
+Runs should be performed using MCDRAM. :ulb,l
:ule
-For Intel Xeon Phi CPUs for simulations with {kspace_style
-pppm} in the input script:
+For simulations using {kspace_style pppm} on Intel CPUs
+supporting AVX-512:
-Edit src/MAKE/OPTIONS/Makefile.knl as necessary. :ulb,l
-Runs should be performed using MCDRAM. :l
-Add "neigh_modify binsize 3" to the input script for better
-performance. :l
-Add "kspace_modify diff ad" to the input script for better
-performance. :l
-export KMP_AFFINITY=none :l
-"-pk intel 0 omp 3 lrt yes -sf intel" or "-pk intel 0 omp 1 lrt yes
--sf intel" added to LAMMPS command-line. Choice for best performance
-will depend on the simulation. :l
+Add "kspace_modify diff ad" to the input script :ulb,l
+The command-line option should be changed to
+"-pk intel 0 omp $r lrt yes -sf intel" where $r is the number of
+threads minus 1. :l
+Do not use thread affinity (set KMP_AFFINITY=none) :l
+The "newton off" setting may provide better scalability :l
:ule
For Intel Xeon Phi coprocessors (Offload):
@@ -169,6 +195,10 @@ cat /proc/cpuinfo :pre
[Building LAMMPS with the USER-INTEL package:]
+NOTE: See the src/USER-INTEL/README file for additional flags that
+might be needed for best performance on Intel server processors
+code-named "Skylake".
+
The USER-INTEL package must be installed into the source directory:
make yes-user-intel :pre
@@ -322,8 +352,8 @@ follow in the input script.
NOTE: The USER-INTEL package will perform better with modifications
to the input script when "PPPM"_kspace_style.html is used:
-"kspace_modify diff ad"_kspace_modify.html and "neigh_modify binsize
-3"_neigh_modify.html should be added to the input script.
+"kspace_modify diff ad"_kspace_modify.html should be added to the
+input script.
Long-Range Thread (LRT) mode is an option to the "package
intel"_package.html command that can improve performance when using
@@ -342,6 +372,10 @@ would normally perform best with "-pk intel 0 omp 4", instead use
environment variable "KMP_AFFINITY=none". LRT mode is not supported
when using offload.
+NOTE: Changing the "newton"_newton.html setting to off can improve
+performance and/or scalability for simple 2-body potentials such as
+lj/cut or when using LRT mode on processors supporting AVX-512.
+
Not all styles are supported in the USER-INTEL package. You can mix
the USER-INTEL package with styles from the "OPT"_accelerate_opt.html
package or the "USER-OMP package"_accelerate_omp.html. Of course,
@@ -467,7 +501,7 @@ supported.
Brown, W.M., Carrillo, J.-M.Y., Mishra, B., Gavhane, N., Thakker, F.M., De Kraker, A.R., Yamada, M., Ang, J.A., Plimpton, S.J., "Optimizing Classical Molecular Dynamics in LAMMPS," in Intel Xeon Phi Processor High Performance Programming: Knights Landing Edition, J. Jeffers, J. Reinders, A. Sodani, Eds. Morgan Kaufmann. :ulb,l
-Brown, W. M., Semin, A., Hebenstreit, M., Khvostov, S., Raman, K., Plimpton, S.J. Increasing Molecular Dynamics Simulation Rates with an 8-Fold Increase in Electrical Power Efficiency. 2016 International Conference for High Performance Computing. In press. :l
+Brown, W. M., Semin, A., Hebenstreit, M., Khvostov, S., Raman, K., Plimpton, S.J. "Increasing Molecular Dynamics Simulation Rates with an 8-Fold Increase in Electrical Power Efficiency."_http://dl.acm.org/citation.cfm?id=3014915 2016 High Performance Computing, Networking, Storage and Analysis, SC16: International Conference (pp. 82-95). :l
Brown, W.M., Carrillo, J.-M.Y., Gavhane, N., Thakkar, F.M., Plimpton, S.J. Optimizing Legacy Molecular Dynamics Software with Directive-Based Offload. Computer Physics Communications. 2015. 195: p. 95-101. :l
:ule
diff --git a/doc/src/bond_oxdna.txt b/doc/src/bond_oxdna.txt
index f9b35a167c..2add6f4c2f 100644
--- a/doc/src/bond_oxdna.txt
+++ b/doc/src/bond_oxdna.txt
@@ -30,7 +30,7 @@ The {oxdna/fene} and {oxdna2/fene} bond styles use the potential
to define a modified finite extensible nonlinear elastic (FENE) potential
"(Ouldridge)"_#oxdna_fene to model the connectivity of the phosphate backbone
-in the oxDNA force field for coarse-grained modelling of DNA.
+in the oxDNA force field for coarse-grained modelling of DNA.
The following coefficients must be defined for the bond type via the
"bond_coeff"_bond_coeff.html command as given in the above example, or in
@@ -43,8 +43,8 @@ r0 (distance) :ul
NOTE: The oxDNA bond style has to be used together with the corresponding oxDNA pair styles
for excluded volume interaction {oxdna/excv}, stacking {oxdna/stk}, cross-stacking {oxdna/xstk}
-and coaxial stacking interaction {oxdna/coaxstk} as well as hydrogen-bonding interaction {oxdna/hbond} (see also documentation of
-"pair_style oxdna/excv"_pair_oxdna.html). For the oxDNA2 "(Snodin)"_#oxdna2 bond style the analogous pair styles and an additional Debye-Hueckel pair
+and coaxial stacking interaction {oxdna/coaxstk} as well as hydrogen-bonding interaction {oxdna/hbond} (see also documentation of
+"pair_style oxdna/excv"_pair_oxdna.html). For the oxDNA2 "(Snodin)"_#oxdna2 bond style the analogous pair styles and an additional Debye-Hueckel pair
style {oxdna2/dh} have to be defined.
The coefficients in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model.
@@ -66,7 +66,7 @@ LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
-"pair_style oxdna/excv"_pair_oxdna.html, "pair_style oxdna2/excv"_pair_oxdna2.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "bond_coeff"_bond_coeff.html
+"pair_style oxdna/excv"_pair_oxdna.html, "pair_style oxdna2/excv"_pair_oxdna2.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "bond_coeff"_bond_coeff.html
[Default:] none
diff --git a/doc/src/compute_cnp_atom.txt b/doc/src/compute_cnp_atom.txt
index 9aa63c84de..16a51f5241 100644
--- a/doc/src/compute_cnp_atom.txt
+++ b/doc/src/compute_cnp_atom.txt
@@ -42,7 +42,7 @@ where the index {j} goes over the {n}i nearest neighbors of atom
{i}, and the index {k} goes over the {n}ij common nearest neighbors
between atom {i} and atom {j}. Rik and Rjk are the vectors connecting atom
{k} to atoms {i} and {j}. The quantity in the double sum is computed
-for each atom.
+for each atom.
The CNP calculation is sensitive to the specified cutoff value.
You should ensure that the appropriate nearest neighbors of an atom are
diff --git a/doc/src/compute_pair_local.txt b/doc/src/compute_pair_local.txt
index 0121210994..16aaba4667 100644
--- a/doc/src/compute_pair_local.txt
+++ b/doc/src/compute_pair_local.txt
@@ -76,7 +76,9 @@ command for the types of the two atoms is used. For the {radius}
setting, the sum of the radii of the two particles is used as a
cutoff. For example, this is appropriate for granular particles which
only interact when they are overlapping, as computed by "granular pair
-styles"_pair_gran.txt.
+styles"_pair_gran.txt. Note that if a granular model defines atom
+types such that all particles of a specific type are monodisperse
+(same diameter), then the two settings are effectively identical.
Note that as atoms migrate from processor to processor, there will be
no consistent ordering of the entries within the local vector or array
diff --git a/doc/src/compute_property_local.txt b/doc/src/compute_property_local.txt
index f7851e864b..39106a39c8 100644
--- a/doc/src/compute_property_local.txt
+++ b/doc/src/compute_property_local.txt
@@ -79,6 +79,9 @@ the two atoms is used. For the {radius} setting, the sum of the radii
of the two particles is used as a cutoff. For example, this is
appropriate for granular particles which only interact when they are
overlapping, as computed by "granular pair styles"_pair_gran.html.
+Note that if a granular model defines atom types such that all
+particles of a specific type are monodisperse (same diameter), then
+the two settings are effectively identical.
If the inputs are bond, angle, etc attributes, the local data is
generated by looping over all the atoms owned on a processor and
diff --git a/doc/src/dihedral_charmm.txt b/doc/src/dihedral_charmm.txt
index 918755ec38..73dc67cdef 100644
--- a/doc/src/dihedral_charmm.txt
+++ b/doc/src/dihedral_charmm.txt
@@ -138,7 +138,15 @@ more instructions on how to use the accelerated styles effectively.
[Restrictions:]
-This dihedral style can only be used if LAMMPS was built with the
+When using run_style "respa"_run_style.html, these dihedral styles
+must be assigned to the same r-RESPA level as {pair} or {outer}.
+
+When used in combination with CHARMM pair styles, the 1-4
+"special_bonds"_special_bonds.html scaling factors must be set to 0.0.
+Otherwise non-bonded contributions for these 1-4 pairs will be
+computed multiple times.
+
+These dihedral styles can only be used if LAMMPS was built with the
MOLECULE package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info on packages.
diff --git a/doc/src/dump_vtk.txt b/doc/src/dump_vtk.txt
index 21502e7f49..d4d28c81fc 100644
--- a/doc/src/dump_vtk.txt
+++ b/doc/src/dump_vtk.txt
@@ -16,7 +16,7 @@ ID = user-assigned name for the dump
group-ID = ID of the group of atoms to be dumped
vtk = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page)
N = dump every this many timesteps
-file = name of file to write dump info to
+file = name of file to write dump info to
args = same as arguments for "dump_style custom"_dump.html :ul
[Examples:]
@@ -83,7 +83,7 @@ Triclinic simulation boxes (non-orthogonal) are saved as
hexahedrons in either legacy .vtk or .vtu XML format.
Style {vtk} allows you to specify a list of atom attributes to be
-written to the dump file for each atom. The list of possible attributes
+written to the dump file for each atom. The list of possible attributes
is the same as for the "dump_style custom"_dump.html command; see
its doc page for a listing and an explanation of each attribute.
diff --git a/doc/src/fix_box_relax.txt b/doc/src/fix_box_relax.txt
index 54decd6282..e3d75ee858 100644
--- a/doc/src/fix_box_relax.txt
+++ b/doc/src/fix_box_relax.txt
@@ -245,7 +245,7 @@ appear the system is converging to your specified pressure. The
solution for this is to either (a) zero the velocities of all atoms
before performing the minimization, or (b) make sure you are
monitoring the pressure without its kinetic component. The latter can
-be done by outputting the pressure from the pressure compute this
+be done by outputting the pressure from the pressure compute this
command creates (see below) or a pressure compute you define yourself.
NOTE: Because pressure is often a very sensitive function of volume,
diff --git a/doc/src/fix_eos_table_rx.txt b/doc/src/fix_eos_table_rx.txt
index e8d515e1f3..e5e4f772f6 100644
--- a/doc/src/fix_eos_table_rx.txt
+++ b/doc/src/fix_eos_table_rx.txt
@@ -45,14 +45,14 @@ species {j} in particle {i}, {u_j} is the internal energy of species j,
{DeltaH_f,j} is the heat of formation of species {j}, N is the number of
molecules represented by the coarse-grained particle, kb is the
Boltzmann constant, and T is the temperature of the system. Additionally,
-it is possible to modify the concentration-dependent particle internal
-energy relation by adding an energy correction, temperature-dependent
+it is possible to modify the concentration-dependent particle internal
+energy relation by adding an energy correction, temperature-dependent
correction, and/or a molecule-dependent correction. An energy correction can
-be specified as a constant (in energy units). A temperature correction can be
-specified by multiplying a temperature correction coefficient by the
-internal temperature. A molecular correction can be specified by
-by multiplying a molecule correction coefficient by the average number of
-product gas particles in the coarse-grain particle.
+be specified as a constant (in energy units). A temperature correction can be
+specified by multiplying a temperature correction coefficient by the
+internal temperature. A molecular correction can be specified by
+by multiplying a molecule correction coefficient by the average number of
+product gas particles in the coarse-grain particle.
Fix {eos/table/rx} creates interpolation tables of length {N} from {m}
internal energy values of each species {u_j} listed in a file as a
@@ -72,12 +72,12 @@ The second filename specifies a file containing heat of formation
{DeltaH_f,j} for each species.
In cases where the coarse-grain particle represents a single molecular
-species (i.e., no reactions occur and fix {rx} is not present in the input file),
-fix {eos/table/rx} can be applied in a similar manner to fix {eos/table}
-within a non-reactive DPD simulation. In this case, the heat of formation
+species (i.e., no reactions occur and fix {rx} is not present in the input file),
+fix {eos/table/rx} can be applied in a similar manner to fix {eos/table}
+within a non-reactive DPD simulation. In this case, the heat of formation
filename is replaced with the heat of formation value for the single species.
-Additionally, the energy correction and temperature correction coefficients may
-also be specified as fix arguments.
+Additionally, the energy correction and temperature correction coefficients may
+also be specified as fix arguments.
:line
@@ -138,8 +138,8 @@ used as the species name must correspond with the tags used to define
the reactions with the "fix rx"_fix_rx.html command.
Alternatively, corrections to the EOS can be included by specifying
-three additional columns that correspond to the energy correction,
-the temperature correction coefficient and molecule correction
+three additional columns that correspond to the energy correction,
+the temperature correction coefficient and molecule correction
coefficient. In this case, the format of the file is as follows:
# HEAT OF FORMATION TABLE (one or more comment or blank lines) :pre
diff --git a/doc/src/fix_filter_corotate.txt b/doc/src/fix_filter_corotate.txt
index a3339648fa..b782d285c7 100644
--- a/doc/src/fix_filter_corotate.txt
+++ b/doc/src/fix_filter_corotate.txt
@@ -70,8 +70,8 @@ minimization"_minimize.html.
[Restrictions:]
-This fix is part of the USER-MISC package. It is only enabled if
-LAMMPS was built with that package. See the "Making
+This fix is part of the USER-MISC package. It is only enabled if
+LAMMPS was built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
Currently, it does not support "molecule templates"_molecule.html.
diff --git a/doc/src/fix_gcmc.txt b/doc/src/fix_gcmc.txt
index 7ac607a2f1..41ec38cffb 100644
--- a/doc/src/fix_gcmc.txt
+++ b/doc/src/fix_gcmc.txt
@@ -406,7 +406,7 @@ the user for each subsequent fix gcmc command.
[Default:]
The option defaults are mol = no, maxangle = 10, overlap_cutoff = 0.0,
-fugacity_coeff = 1, and full_energy = no,
+fugacity_coeff = 1, and full_energy = no,
except for the situations where full_energy is required, as
listed above.
diff --git a/doc/src/fix_grem.txt b/doc/src/fix_grem.txt
index 3fc5c1a10e..661f68ed99 100644
--- a/doc/src/fix_grem.txt
+++ b/doc/src/fix_grem.txt
@@ -85,13 +85,13 @@ No information about this fix is written to "binary restart
files"_restart.html.
The "thermo_modify"_thermo_modify.html {press} option is supported
-by this fix to add the rescaled kinetic pressure as part of
+by this fix to add the rescaled kinetic pressure as part of
"thermodynamic output"_thermo_style.html.
[Restrictions:]
-This fix is part of the USER-MISC package. It is only enabled if
-LAMMPS was built with that package. See the "Making
+This fix is part of the USER-MISC package. It is only enabled if
+LAMMPS was built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
[Related commands:]
diff --git a/doc/src/fix_ipi.txt b/doc/src/fix_ipi.txt
index b1533830bc..07e8025d77 100644
--- a/doc/src/fix_ipi.txt
+++ b/doc/src/fix_ipi.txt
@@ -58,14 +58,14 @@ input are listed in the same order as in the data file of LAMMPS. The
initial configuration is ignored, as it will be substituted with the
coordinates received from i-PI before forces are ever evaluated.
-A note of caution when using potentials that contain long-range
+A note of caution when using potentials that contain long-range
electrostatics, or that contain parameters that depend on box size:
all of these options will be initialized based on the cell size in the
-LAMMPS-side initial configuration and kept constant during the run.
-This is required to e.g. obtain reproducible and conserved forces.
-If the cell varies too wildly, it may be advisable to reinitialize
-these interactions at each call. This behavior can be requested by
-setting the {reset} switch.
+LAMMPS-side initial configuration and kept constant during the run.
+This is required to e.g. obtain reproducible and conserved forces.
+If the cell varies too wildly, it may be advisable to reinitialize
+these interactions at each call. This behavior can be requested by
+setting the {reset} switch.
[Restart, fix_modify, output, run start/stop, minimize info:]
diff --git a/doc/src/fix_mscg.txt b/doc/src/fix_mscg.txt
index 0e09f8a9c5..7d16967955 100644
--- a/doc/src/fix_mscg.txt
+++ b/doc/src/fix_mscg.txt
@@ -57,7 +57,7 @@ simulations is as follows:
Perform all-atom simulations on the system to be coarse grained.
Generate a trajectory mapped to the coarse-grained model.
Create input files for the MS-CG library.
-Run the range finder functionality of the MS-CG library.
+Run the range finder functionality of the MS-CG library.
Run the force matching functionality of the MS-CG library.
Check the results of the force matching.
Run coarse-grained simulations using the new coarse-grained potentials. :ol
@@ -70,7 +70,7 @@ Step 2 can be performed using a Python script (what is the name?)
provided with the MS-CG library which defines the coarse-grained model
and converts a standard LAMMPS dump file for an all-atom simulation
(step 1) into a LAMMPS dump file which has the positions of and forces
-on the coarse-grained beads.
+on the coarse-grained beads.
In step 3, an input file named "control.in" is needed by the MS-CG
library which sets parameters for the range finding and force matching
diff --git a/doc/src/fix_neb.txt b/doc/src/fix_neb.txt
index 94c6ee84fd..52d8a7df84 100644
--- a/doc/src/fix_neb.txt
+++ b/doc/src/fix_neb.txt
@@ -14,152 +14,179 @@ fix ID group-ID neb Kspring keyword value :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
neb = style name of this fix command :l
-Kspring = parallel spring constant (force/distance units or force units) :l
+Kspring = spring constant for parallel nudging force (force/distance units or force units, see parallel keyword) :l
zero or more keyword/value pairs may be appended :l
-keyword = {nudg_style} or {perp} or {freend} or {freend_k_spring} :l
- {nudg_style} value = {neigh} or {idealpos}
- {neigh} = the parallel nudging force is calculated from the distances to neighbouring replicas (in this case, Kspring is in force/distance units)
- {idealpos} = the parallel nudging force is proportional to the distance between the replica and its interpolated ideal position (in this case Kspring is in force units)
- {perp} value {none} or kspring2
- {none} = no perpendicular spring force is applied
- {kspring2} = spring constant for the perpendicular nudging force (in force/distance units)
- {freeend} value = {none} or {ini} or {final} or {finaleini} or {final2eini}
- {none} = no nudging force is applied to the first and last replicas
- {ini} = set the first replica to be a free end
- {final} = set the last replica to be a free end
- {finaleini} = set the last replica to be a free end and set its target energy as that of the first replica
- {final2eini} = same as {finaleini} plus prevent intermediate replicas to have a lower energy than the first replica
- {freeend_kspring} value = kspring3
- kspring3 = spring constant of the perpendicular spring force (per distance units)
- :pre
+keyword = {parallel} or {perp} or {end} :l
+ {parallel} value = {neigh} or {ideal}
+ {neigh} = parallel nudging force based on distance to neighbor replicas (Kspring = force/distance units)
+ {ideal} = parallel nudging force based on interpolated ideal position (Kspring = force units)
+ {perp} value = {Kspring2}
+ {Kspring2} = spring constant for perpendicular nudging force (force/distance units)
+ {end} values = estyle Kspring3
+ {estyle} = {first} or {last} or {last/efirst} or {last/efirst/middle}
+ {first} = apply force to first replica
+ {last} = apply force to last replica
+ {last/efirst} = apply force to last replica and set its target energy to that of first replica
+ {last/efirst/middle} = same as {last/efirst} plus prevent middle replicas having lower energy than first replica
+ {Kspring3} = spring constant for target energy term (1/distance units) :pre,ule
[Examples:]
fix 1 active neb 10.0
-fix 2 all neb 1.0 perp 1.0 freeend final
-fix 1 all neb 1.0 nudg_style idealpos freeend final2eini freend_kspring 1:pre
+fix 2 all neb 1.0 perp 1.0 end last
+fix 2 all neb 1.0 perp 1.0 end first 1.0 end last 1.0
+fix 1 all neb 1.0 nudge ideal end last/efirst 1 :pre
[Description:]
-Add a nudging force to atoms in the group for a multi-replica
+Add nudging forces to atoms in the group for a multi-replica
simulation run via the "neb"_neb.html command to perform a nudged
elastic band (NEB) calculation for finding the transition state.
Hi-level explanations of NEB are given with the "neb"_neb.html command
and in "Section_howto 5"_Section_howto.html#howto_5 of the manual.
The fix neb command must be used with the "neb" command and defines
-how nudging inter-replica forces are computed. A NEB calculation is
+how inter-replica nudging forces are computed. A NEB calculation is
divided in two stages. In the first stage n replicas are relaxed
-toward a MEP and in a second stage, the climbing image scheme (see
-"(Henkelman2)"_#Henkelman2) is turned on so that the replica having
-the highest energy relaxes toward the saddle point (i.e. the point of
-highest energy along the MEP).
+toward a MEP until convergence. In the second stage, the climbing
+image scheme (see "(Henkelman2)"_#Henkelman2) is enabled, so that the
+replica having the highest energy relaxes toward the saddle point
+(i.e. the point of highest energy along the MEP), and a second
+relaxation is performed.
-One purpose of the nudging forces is to keep the replicas equally
-spaced. During the NEB, the 3N-length vector of interatomic force Fi
-= -Grad(V) of replicas i is altered. For all intermediate replicas
-(i.e. for 1 0
+The interatomic force Fi for the specified replica becomes:
+
+Fi = -Grad(V) + (Grad(V) dot T' + (E-ETarget)*Kspring3) T', {when} Grad(V) dot T' < 0
+Fi = -Grad(V) + (Grad(V) dot T' + (ETarget- E)*Kspring3) T', {when} Grad(V) dot T' > 0
:pre
-where E is the energy of the free end replica and ETarget is the
-target energy.
+where E is the current energy of the replica and ETarget is the target
+energy. The "spring" constant on the difference in energies is the
+specified {Kspring3} value.
-When the value {ini} ({final}) is used after the keyword {freeend},
-the first (last) replica is considered as a free end. The target
-energy is set to the energy of the replica at starting of the NEB
-calculation. When the value {finaleini} or {final2eini} is used the
-last image is considered as a free end and the target energy is equal
-to the energy of the first replica (which can evolve during the NEB
-relaxation). With the value {finaleini}, when the initial path is too
-far from the MEP, an intermediate repilica might relax "faster" and
-get a lower energy than the last replica. The benefit of the free end
-is then lost since this intermediate replica will relax toward a local
-minima. This behavior can be prevented by using the value {final2eini}
-which remove entirely the contribution of the gradient for all
-intermediate replica which have a lower energy than the initial one
-thus preventing these replicae to over-relax. After converging a NEB
-with the {final2eini} value it is recommended to check that all
-intermediate replica have a larger energy than the initial
-replica. Finally note that if the last replica converges toward a
-local minimum with a larger energy than the energy of the first
-replica, a free end neb calculation with the value {finaleini} or
-{final2eini} cannot reach the convergence criteria.
+When {estyle} is specified as {first}, the force is applied to the
+first replica. When {estyle} is specified as {last}, the force is
+applied to the last replica. Note that the {end} keyword can be used
+twice to add forces to both the first and last replicas.
-:line
+For both these {estyle} settings, the target energy {ETarget} is set
+to the initial energy of the replica (at the start of the NEB
+calculation).
+If the {estyle} is specified as {last/efirst} or {last/efirst/middle},
+force is applied to the last replica, but the target energy {ETarget}
+is continuously set to the energy of the first replica, as it evolves
+during the NEB relaxation.
+The difference between these two {estyle} options is as follows. When
+{estyle} is specified as {last/efirst}, no change is made to the
+inter-replica force applied to the intermediate replicas (neither
+first or last). If the initial path is too far from the MEP, an
+intermediate repilica may relax "faster" and reach a lower energy than
+the last replica. In this case the intermediate replica will be
+relaxing toward its own local minima. This behavior can be prevented
+by specifying {estyle} as {last/efirst/middle} which will alter the
+inter-replica force applied to intermediate replicas by removing the
+contribution of the gradient to the inter-replica force. This will
+only be done if a particular intermediate replica has a lower energy
+than the first replica. This should effectively prevent the
+intermediate replicas from over-relaxing.
-In the second stage of the NEB, the interatomic force Fi for the
-climbing replica (which is the replica of highest energy) becomes:
-
-Fi = -Grad(V) + 2 (Grad(V) dot That) That :pre
-
+After converging a NEB calculation using an {estyle} of
+{last/efirst/middle}, you should check that all intermediate replicas
+have a larger energy than the first replica. If this is not the case,
+the path is probably not a MEP.
+Finally, note that if the last replica converges toward a local
+minimum which has a larger energy than the energy of the first
+replica, a NEB calculation using an {estyle} of {last/efirst} or
+{last/efirst/middle} cannot reach final convergence.
[Restart, fix_modify, output, run start/stop, minimize info:]
@@ -186,7 +213,8 @@ for more info on packages.
[Default:]
-The option defaults are nudg_style = neigh, perp = none, freeend = none and freend_kspring = 1.
+The option defaults are nudge = neigh, perp = 0.0, ends is not
+specified (no inter-replica force on the end replicas).
:line
@@ -197,14 +225,14 @@ The option defaults are nudg_style = neigh, perp = none, freeend = none and free
[(Henkelman2)] Henkelman, Uberuaga, Jonsson, J Chem Phys, 113,
9901-9904 (2000).
-:link(E)
-[(E)] E, Ren, Vanden-Eijnden, Phys Rev B, 66, 052301 (2002)
+:link(WeinenE)
+[(WeinenE)] E, Ren, Vanden-Eijnden, Phys Rev B, 66, 052301 (2002).
:link(Jonsson)
[(Jonsson)] Jonsson, Mills and Jacobsen, in Classical and Quantum
-Dynamics in Condensed Phase Simulations, edited by Berne, Ciccotti, and Coker
-World Scientific, Singapore, 1998, p. 385
+Dynamics in Condensed Phase Simulations, edited by Berne, Ciccotti,
+and Coker World Scientific, Singapore, 1998, p 385.
:link(Maras1)
[(Maras)] Maras, Trushin, Stukowski, Ala-Nissila, Jonsson,
-Comp Phys Comm, 205, 13-21 (2016)
+Comp Phys Comm, 205, 13-21 (2016).
diff --git a/doc/src/fix_nve_dot.txt b/doc/src/fix_nve_dot.txt
index b1c00cd25c..7ad51f3768 100644
--- a/doc/src/fix_nve_dot.txt
+++ b/doc/src/fix_nve_dot.txt
@@ -23,13 +23,13 @@ fix 1 all nve/dot :pre
[Description:]
Apply a rigid-body integrator as described in "(Davidchack)"_#Davidchack1
-to a group of atoms, but without Langevin dynamics.
+to a group of atoms, but without Langevin dynamics.
This command performs Molecular dynamics (MD)
-via a velocity-Verlet algorithm and an evolution operator that rotates
-the quaternion degrees of freedom, similar to the scheme outlined in "(Miller)"_#Miller1.
+via a velocity-Verlet algorithm and an evolution operator that rotates
+the quaternion degrees of freedom, similar to the scheme outlined in "(Miller)"_#Miller1.
This command is the equivalent of the "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html
-without damping and noise and can be used to determine the stability range
+without damping and noise and can be used to determine the stability range
in a NVE ensemble prior to using the Langevin-type DOTC-integrator
(see also "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html).
The command is equivalent to the "fix nve"_fix_nve.html.
diff --git a/doc/src/fix_nve_dotc_langevin.txt b/doc/src/fix_nve_dotc_langevin.txt
index 19d5b233ce..5de8e663c4 100644
--- a/doc/src/fix_nve_dotc_langevin.txt
+++ b/doc/src/fix_nve_dotc_langevin.txt
@@ -28,20 +28,20 @@ fix 1 all nve/dotc/langevin 1.0 1.0 0.03 457145 angmom 10 :pre
[Description:]
-Apply a rigid-body Langevin-type integrator of the kind "Langevin C"
+Apply a rigid-body Langevin-type integrator of the kind "Langevin C"
as described in "(Davidchack)"_#Davidchack2
to a group of atoms, which models an interaction with an implicit background
solvent. This command performs Brownian dynamics (BD)
-via a technique that splits the integration into a deterministic Hamiltonian
-part and the Ornstein-Uhlenbeck process for noise and damping.
+via a technique that splits the integration into a deterministic Hamiltonian
+part and the Ornstein-Uhlenbeck process for noise and damping.
The quaternion degrees of freedom are updated though an evolution
operator which performs a rotation in quaternion space, preserves
the quaternion norm and is akin to "(Miller)"_#Miller2.
-In terms of syntax this command has been closely modelled on the
-"fix langevin"_fix_langevin.html and its {angmom} option. But it combines
-the "fix nve"_fix_nve.html and the "fix langevin"_fix_langevin.html in
-one single command. The main feature is improved stability
+In terms of syntax this command has been closely modelled on the
+"fix langevin"_fix_langevin.html and its {angmom} option. But it combines
+the "fix nve"_fix_nve.html and the "fix langevin"_fix_langevin.html in
+one single command. The main feature is improved stability
over the standard integrator, permitting slightly larger timestep sizes.
NOTE: Unlike the "fix langevin"_fix_langevin.html this command performs
@@ -57,7 +57,7 @@ Fc is the conservative force computed via the usual inter-particle
interactions ("pair_style"_pair_style.html,
"bond_style"_bond_style.html, etc).
-The Ff and Fr terms are implicitly taken into account by this fix
+The Ff and Fr terms are implicitly taken into account by this fix
on a per-particle basis.
Ff is a frictional drag or viscous damping term proportional to the
@@ -77,7 +77,7 @@ a Gaussian random number) for speed.
:line
-{Tstart} and {Tstop} have to be constant values, i.e. they cannot
+{Tstart} and {Tstop} have to be constant values, i.e. they cannot
be variables.
The {damp} parameter is specified in time units and determines how
@@ -98,16 +98,16 @@ different numbers of processors.
The keyword/value option has to be used in the following way:
-This fix has to be used together with the {angmom} keyword. The
-particles are always considered to have a finite size.
-The keyword {angmom} enables thermostatting of the rotational degrees of
-freedom in addition to the usual translational degrees of freedom.
+This fix has to be used together with the {angmom} keyword. The
+particles are always considered to have a finite size.
+The keyword {angmom} enables thermostatting of the rotational degrees of
+freedom in addition to the usual translational degrees of freedom.
-The scale factor after the {angmom} keyword gives the ratio of the rotational to
+The scale factor after the {angmom} keyword gives the ratio of the rotational to
the translational friction coefficient.
An example input file can be found in /examples/USER/cgdna/examples/duplex2/.
-A technical report with more information on this integrator can be found
+A technical report with more information on this integrator can be found
"here"_PDF/USER-CGDNA-overview.pdf.
:line
@@ -120,7 +120,7 @@ LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
-"fix nve"_fix_nve.html, "fix langevin"_fix_langevin.html, "fix nve/dot"_fix_nve_dot.html,
+"fix nve"_fix_nve.html, "fix langevin"_fix_langevin.html, "fix nve/dot"_fix_nve_dot.html,
[Default:] none
diff --git a/doc/src/fix_nvk.txt b/doc/src/fix_nvk.txt
index 271483b441..49fd8217ab 100644
--- a/doc/src/fix_nvk.txt
+++ b/doc/src/fix_nvk.txt
@@ -27,7 +27,7 @@ timestep. V is volume; K is kinetic energy. This creates a system
trajectory consistent with the isokinetic ensemble.
The equations of motion used are those of Minary et al in
-"(Minary)"_#nvk-Minary, a variant of those initially given by Zhang in
+"(Minary)"_#nvk-Minary, a variant of those initially given by Zhang in
"(Zhang)"_#nvk-Zhang.
The kinetic energy will be held constant at its value given when fix
diff --git a/doc/src/fix_spring.txt b/doc/src/fix_spring.txt
index 5f94f4cdae..014a43aacc 100644
--- a/doc/src/fix_spring.txt
+++ b/doc/src/fix_spring.txt
@@ -89,7 +89,7 @@ NOTE: The center of mass of a group of atoms is calculated in
group can straddle a periodic boundary. See the "dump"_dump.html doc
page for a discussion of unwrapped coordinates. It also means that a
spring connecting two groups or a group and the tether point can cross
-a periodic boundary and its length be calculated correctly.
+a periodic boundary and its length be calculated correctly.
[Restart, fix_modify, output, run start/stop, minimize info:]
diff --git a/doc/src/fix_ti_spring.txt b/doc/src/fix_ti_spring.txt
index 40e595e21e..afb1dcf8ff 100644
--- a/doc/src/fix_ti_spring.txt
+++ b/doc/src/fix_ti_spring.txt
@@ -144,7 +144,11 @@ this fix.
"fix spring"_fix_spring.html, "fix adapt"_fix_adapt.html
-[Restrictions:] none
+[Restrictions:]
+
+This fix is part of the USER-MISC package. It is only enabled if
+LAMMPS was built with that package. See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
[Default:]
diff --git a/doc/src/kspace_modify.txt b/doc/src/kspace_modify.txt
index b488df9627..6d27bb7076 100644
--- a/doc/src/kspace_modify.txt
+++ b/doc/src/kspace_modify.txt
@@ -219,10 +219,10 @@ instead of using the virial equation. This option cannot be used to access
individual components of the pressure tensor, to compute per-atom virial,
or with suffix kspace/pair styles of MSM, like OMP or GPU.
-The {fftbench} keyword applies only to PPPM. It is on by default. If
-this option is turned off, LAMMPS will not take the time at the end
-of a run to give FFT benchmark timings, and will finish a few seconds
-faster than it would if this option were on.
+The {fftbench} keyword applies only to PPPM. It is off by default. If
+this option is turned on, LAMMPS will perform a short FFT benchmark
+computation and report its timings, and will thus finish a some seconds
+later than it would if this option were off.
The {collective} keyword applies only to PPPM. It is set to {no} by
default, except on IBM BlueGene machines. If this option is set to
@@ -306,9 +306,10 @@ parameters, see the "How-To"_Section_howto.html#howto_24 discussion.
The option defaults are mesh = mesh/disp = 0 0 0, order = order/disp =
5 (PPPM), order = 10 (MSM), minorder = 2, overlap = yes, force = -1.0,
gewald = gewald/disp = 0.0, slab = 1.0, compute = yes, cutoff/adjust =
-yes (MSM), pressure/scalar = yes (MSM), fftbench = yes (PPPM), diff = ik
+yes (MSM), pressure/scalar = yes (MSM), fftbench = no (PPPM), diff = ik
(PPPM), mix/disp = pair, force/disp/real = -1.0, force/disp/kspace = -1.0,
-split = 0, tol = 1.0e-6, and disp/auto = no.
+split = 0, tol = 1.0e-6, and disp/auto = no. For pppm/intel, order =
+order/disp = 7.
:line
diff --git a/doc/src/kspace_style.txt b/doc/src/kspace_style.txt
index 371540bd68..4f27c9aa78 100644
--- a/doc/src/kspace_style.txt
+++ b/doc/src/kspace_style.txt
@@ -33,12 +33,16 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
accuracy = desired relative error in forces
{pppm/gpu} value = accuracy
accuracy = desired relative error in forces
+ {pppm/intel} value = accuracy
+ accuracy = desired relative error in forces
{pppm/kk} value = accuracy
accuracy = desired relative error in forces
{pppm/omp} value = accuracy
accuracy = desired relative error in forces
{pppm/cg/omp} value = accuracy
accuracy = desired relative error in forces
+ {pppm/disp/intel} value = accuracy
+ accuracy = desired relative error in forces
{pppm/tip4p/omp} value = accuracy
accuracy = desired relative error in forces
{pppm/stagger} value = accuracy
diff --git a/doc/src/neb.txt b/doc/src/neb.txt
index a4afc2fe6d..d2e8be3f03 100644
--- a/doc/src/neb.txt
+++ b/doc/src/neb.txt
@@ -344,7 +344,7 @@ informations can help understanding what is going wrong. For instance
when the path angle becomes accute the definition of tangent used in
the NEB calculation is questionable and the NEB cannot may diverge
"(Maras)"_#Maras2.
-
+
When running on multiple partitions, LAMMPS produces additional log
files for each partition, e.g. log.lammps.0, log.lammps.1, etc. For a
diff --git a/doc/src/pair_agni.txt b/doc/src/pair_agni.txt
index 27fb6c10fe..06dcccb9d9 100644
--- a/doc/src/pair_agni.txt
+++ b/doc/src/pair_agni.txt
@@ -40,8 +40,8 @@ vectorial atomic forces.
Only a single pair_coeff command is used with the {agni} style which
specifies an AGNI potential file containing the parameters of the
-force field for the needed elements. These are mapped to LAMMPS atom
-types by specifying N additional arguments after the filename in the
+force field for the needed elements. These are mapped to LAMMPS atom
+types by specifying N additional arguments after the filename in the
pair_coeff command, where N is the number of LAMMPS atom types:
filename
@@ -52,13 +52,13 @@ to specify the path for the force field file.
An AGNI force field is fully specified by the filename which contains the
parameters of the force field, i.e., the reference training environments
-used to construct the machine learning force field. Example force field
-and input files are provided in the examples/USER/misc/agni directory.
+used to construct the machine learning force field. Example force field
+and input files are provided in the examples/USER/misc/agni directory.
:line
-Styles with {omp} suffix is functionally the same as the corresponding
-style without the suffix. They have been optimized to run faster, depending
+Styles with {omp} suffix is functionally the same as the corresponding
+style without the suffix. They have been optimized to run faster, depending
on your available hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated style takes the same arguments and
should produce the same results, except for round-off and precision
diff --git a/doc/src/pair_buck.txt b/doc/src/pair_buck.txt
index 49161404c3..e705e735fb 100644
--- a/doc/src/pair_buck.txt
+++ b/doc/src/pair_buck.txt
@@ -75,7 +75,7 @@ Lennard-Jones 12/6) given by
:c,image(Eqs/pair_buck.jpg)
where rho is an ionic-pair dependent length parameter, and Rc is the
-cutoff on both terms.
+cutoff on both terms.
The styles with {coul/cut} or {coul/long} or {coul/msm} add a
Coulombic term as described for the "lj/cut"_pair_lj.html pair styles.
diff --git a/doc/src/pair_charmm.txt b/doc/src/pair_charmm.txt
index 9c5973c725..1e78607c08 100644
--- a/doc/src/pair_charmm.txt
+++ b/doc/src/pair_charmm.txt
@@ -104,7 +104,15 @@ charmmfsw"_dihedral_charmm.html command. Eventually code from the new
styles will propagate into the related pair styles (e.g. implicit,
accelerator, free energy variants).
-The general CHARMM formulas are as follows
+NOTE: The newest CHARMM pair styles reset the Coulombic energy
+conversion factor used internally in the code, from the LAMMPS value
+to the CHARMM value, as if it were effectively a parameter of the
+force field. This is because the CHARMM code uses a slightly
+different value for the this conversion factor in "real
+units"_units.html (Kcal/mole), namely CHARMM = 332.0716, LAMMPS =
+332.06371. This is to enable more precise agreement by LAMMPS with
+the CHARMM force field energies and forces, when using one of these
+two CHARMM pair styles.
:c,image(Eqs/pair_charmm.jpg)
diff --git a/doc/src/pair_dipole.txt b/doc/src/pair_dipole.txt
index a9622b32fd..985581cac8 100644
--- a/doc/src/pair_dipole.txt
+++ b/doc/src/pair_dipole.txt
@@ -71,6 +71,14 @@ and force, Fij = -Fji as symmetric forces, and Tij != -Tji since the
torques do not act symmetrically. These formulas are discussed in
"(Allen)"_#Allen2 and in "(Toukmaji)"_#Toukmaji2.
+Also note, that in the code, all of these terms (except Elj) have a
+C/epsilon prefactor, the same as the Coulombic term in the LJ +
+Coulombic pair styles discussed "here"_pair_lj.html. C is an
+energy-conversion constant and epsilon is the dielectric constant
+which can be set by the "dielectric"_dielectric.html command. The
+same is true of the equations that follow for other dipole pair
+styles.
+
Style {lj/sf/dipole/sf} computes "shifted-force" interactions between
pairs of particles that each have a charge and/or a point dipole
moment. In general, a shifted-force potential is a (sligthly) modified
diff --git a/doc/src/pair_exp6_rx.txt b/doc/src/pair_exp6_rx.txt
index 47045a5933..cbc17d357d 100644
--- a/doc/src/pair_exp6_rx.txt
+++ b/doc/src/pair_exp6_rx.txt
@@ -55,33 +55,33 @@ defined in the reaction kinetics files specified with the "fix
rx"_fix_rx.html command or they must correspond to the tag "1fluid",
signifying interaction with a product species mixture determined
through a one-fluid approximation. The interaction potential is
-weighted by the geometric average of either the mole fraction concentrations
-or the number of molecules associated with the interacting coarse-grained
-particles (see the {fractional} or {molecular} weighting pair style options).
+weighted by the geometric average of either the mole fraction concentrations
+or the number of molecules associated with the interacting coarse-grained
+particles (see the {fractional} or {molecular} weighting pair style options).
The coarse-grained potential is stored before and after the
reaction kinetics solver is applied, where the difference is defined
to be the internal chemical energy (uChem).
-The fourth argument specifies the type of scaling that will be used
+The fourth argument specifies the type of scaling that will be used
to scale the EXP-6 parameters as reactions occur. Currently, there
are three scaling options: {exponent}, {polynomial} and {none}.
-Exponent scaling requires two additional arguments for scaling
+Exponent scaling requires two additional arguments for scaling
the {Rm} and {epsilon} parameters, respectively. The scaling factor
-is computed by phi^exponent, where phi is the number of molecules
-represented by the coarse-grain particle and exponent is specified
+is computed by phi^exponent, where phi is the number of molecules
+represented by the coarse-grain particle and exponent is specified
as a pair coefficient argument for {Rm} and {epsilon}, respectively.
-The {Rm} and {epsilon} parameters are multiplied by the scaling
+The {Rm} and {epsilon} parameters are multiplied by the scaling
factor to give the scaled interaction parameters for the CG particle.
-Polynomial scaling requires a filename to be specified as a pair
+Polynomial scaling requires a filename to be specified as a pair
coeff argument. The file contains the coefficients to a fifth order
-polynomial for the {alpha}, {epsilon} and {Rm} parameters that depend
-upon phi (the number of molecules represented by the CG particle).
+polynomial for the {alpha}, {epsilon} and {Rm} parameters that depend
+upon phi (the number of molecules represented by the CG particle).
The format of a polynomial file is provided below.
The {none} option to the scaling does not have any additional pair coeff
-arguments. This is equivalent to specifying the {exponent} option with
+arguments. This is equivalent to specifying the {exponent} option with
{Rm} and {epsilon} exponents of 0.0 and 0.0, respectively.
The final argument specifies the interaction cutoff (optional).
@@ -102,7 +102,7 @@ parenthesized comments):
# POLYNOMIAL FILE (one or more comment or blank lines) :pre
# General Functional Form:
-# A*phi^5 + B*phi^4 + C*phi^3 + D*phi^2 + E*phi + F
+# A*phi^5 + B*phi^4 + C*phi^3 + D*phi^2 + E*phi + F
#
# Parameter A B C D E F
(blank)
diff --git a/doc/src/pair_kolmogorov_crespi_z.txt b/doc/src/pair_kolmogorov_crespi_z.txt
index 0879dc34d0..c7a6d4194f 100644
--- a/doc/src/pair_kolmogorov_crespi_z.txt
+++ b/doc/src/pair_kolmogorov_crespi_z.txt
@@ -24,25 +24,25 @@ pair_coeff 1 2 kolmogorov/crespi/z CC.KC C C :pre
[Description:]
-The {kolmogorov/crespi/z} style computes the Kolmogorov-Crespi interaction
-potential as described in "(KC05)"_#KC05. An important simplification is made,
-which is to take all normals along the z-axis.
+The {kolmogorov/crespi/z} style computes the Kolmogorov-Crespi interaction
+potential as described in "(KC05)"_#KC05. An important simplification is made,
+which is to take all normals along the z-axis.
:c,image(Eqs/pair_kolmogorov_crespi_z.jpg)
-It is important to have a suffiently large cutoff to ensure smooth forces.
-Energies are shifted so that they go continously to zero at the cutoff assuming
+It is important to have a suffiently large cutoff to ensure smooth forces.
+Energies are shifted so that they go continously to zero at the cutoff assuming
that the exponential part of {Vij} (first term) decays sufficiently fast.
This shift is achieved by the last term in the equation for {Vij} above.
-This potential is intended for interactions between two layers of graphene.
-Therefore, to avoid interaction between layers in multi-layered materials,
-each layer should have a separate atom type and interactions should only
+This potential is intended for interactions between two layers of graphene.
+Therefore, to avoid interaction between layers in multi-layered materials,
+each layer should have a separate atom type and interactions should only
be computed between atom types of neighbouring layers.
-The parameter file (e.g. CC.KC), is intended for use with metal
-"units"_units.html, with energies in meV. An additional parameter, {S},
-is available to facilitate scaling of energies in accordance with
+The parameter file (e.g. CC.KC), is intended for use with metal
+"units"_units.html, with energies in meV. An additional parameter, {S},
+is available to facilitate scaling of energies in accordance with
"(vanWijk)"_#vanWijk.
This potential must be used in combination with hybrid/overlay.
@@ -64,7 +64,7 @@ LAMMPS"_Section_start.html#start_3 section for more info.
:line
-:link(KC05)
+:link(KC05)
[(KC05)] A. N. Kolmogorov, V. H. Crespi, Phys. Rev. B 71, 235415 (2005)
:link(vanWijk)
diff --git a/doc/src/pair_lj_long.txt b/doc/src/pair_lj_long.txt
index d559871f9d..da9f37b9c3 100644
--- a/doc/src/pair_lj_long.txt
+++ b/doc/src/pair_lj_long.txt
@@ -7,6 +7,7 @@
:line
pair_style lj/long/coul/long command :h3
+pair_style lj/long/coul/long/intel command :h3
pair_style lj/long/coul/long/omp command :h3
pair_style lj/long/coul/long/opt command :h3
pair_style lj/long/tip4p/long command :h3
diff --git a/doc/src/pair_lj_smooth_linear.txt b/doc/src/pair_lj_smooth_linear.txt
index 5721b02eb3..a48c441f54 100644
--- a/doc/src/pair_lj_smooth_linear.txt
+++ b/doc/src/pair_lj_smooth_linear.txt
@@ -104,3 +104,8 @@ This pair style can only be used via the {pair} keyword of the
"pair_coeff"_pair_coeff.html, "pair lj/smooth"_pair_lj_smooth.html
[Default:] none
+
+:line
+
+:link(Toxvaerd)
+[(Toxvaerd)] Toxvaerd, Dyre, J Chem Phys, 134, 081102 (2011).
diff --git a/doc/src/pair_multi_lucy_rx.txt b/doc/src/pair_multi_lucy_rx.txt
index bf5d5636fe..77ed223e2a 100644
--- a/doc/src/pair_multi_lucy_rx.txt
+++ b/doc/src/pair_multi_lucy_rx.txt
@@ -97,9 +97,9 @@ tags must either correspond to the species defined in the reaction
kinetics files specified with the "fix rx"_fix_rx.html command or they
must correspond to the tag "1fluid", signifying interaction with a
product species mixture determined through a one-fluid approximation.
-The interaction potential is weighted by the geometric average of
-either the mole fraction concentrations or the number of molecules
-associated with the interacting coarse-grained particles (see the
+The interaction potential is weighted by the geometric average of
+either the mole fraction concentrations or the number of molecules
+associated with the interacting coarse-grained particles (see the
{fractional} or {molecular} weighting pair style options). The coarse-grained potential is
stored before and after the reaction kinetics solver is applied, where
the difference is defined to be the internal chemical energy (uChem).
diff --git a/doc/src/pair_oxdna.txt b/doc/src/pair_oxdna.txt
index 0a07417fd0..d9734f122d 100644
--- a/doc/src/pair_oxdna.txt
+++ b/doc/src/pair_oxdna.txt
@@ -39,17 +39,17 @@ pair_coeff * * oxdna/coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1
[Description:]
-The {oxdna} pair styles compute the pairwise-additive parts of the oxDNA force field
-for coarse-grained modelling of DNA. The effective interaction between the nucleotides consists of potentials for the
+The {oxdna} pair styles compute the pairwise-additive parts of the oxDNA force field
+for coarse-grained modelling of DNA. The effective interaction between the nucleotides consists of potentials for the
excluded volume interaction {oxdna/excv}, the stacking {oxdna/stk}, cross-stacking {oxdna/xstk}
and coaxial stacking interaction {oxdna/coaxstk} as well
as the hydrogen-bonding interaction {oxdna/hbond} between complementary pairs of nucleotides on
opposite strands.
-The exact functional form of the pair styles is rather complex, which manifests itself in the 144 coefficients
-in the above example. The individual potentials consist of products of modulation factors,
-which themselves are constructed from a number of more basic potentials
-(Morse, Lennard-Jones, harmonic angle and distance) as well as quadratic smoothing and modulation terms.
+The exact functional form of the pair styles is rather complex, which manifests itself in the 144 coefficients
+in the above example. The individual potentials consist of products of modulation factors,
+which themselves are constructed from a number of more basic potentials
+(Morse, Lennard-Jones, harmonic angle and distance) as well as quadratic smoothing and modulation terms.
We refer to "(Ouldridge-DPhil)"_#Ouldridge-DPhil1 and "(Ouldridge)"_#Ouldridge1
for a detailed description of the oxDNA force field.
@@ -57,8 +57,8 @@ NOTE: These pair styles have to be used together with the related oxDNA bond sty
{oxdna/fene} for the connectivity of the phosphate backbone (see also documentation of
"bond_style oxdna/fene"_bond_oxdna.html). With one exception the coefficients
in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model.
-The exception is the first coefficient after {oxdna/stk} (T=0.1 in the above example).
-When using a Langevin thermostat, e.g. through "fix langevin"_fix_langevin.html
+The exception is the first coefficient after {oxdna/stk} (T=0.1 in the above example).
+When using a Langevin thermostat, e.g. through "fix langevin"_fix_langevin.html
or "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html
the temperature coefficients have to be matched to the one used in the fix.
@@ -79,7 +79,7 @@ LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
-"bond_style oxdna/fene"_bond_oxdna.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "pair_coeff"_pair_coeff.html,
+"bond_style oxdna/fene"_bond_oxdna.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "pair_coeff"_pair_coeff.html,
"bond_style oxdna2/fene"_bond_oxdna.html, "pair_style oxdna2/excv"_pair_oxdna2.html
[Default:] none
diff --git a/doc/src/pair_oxdna2.txt b/doc/src/pair_oxdna2.txt
index 1cc562d5f1..1728a0bc7b 100644
--- a/doc/src/pair_oxdna2.txt
+++ b/doc/src/pair_oxdna2.txt
@@ -45,17 +45,17 @@ pair_coeff * * oxdna2/dh 0.1 1.0 0.815 :pre
[Description:]
-The {oxdna2} pair styles compute the pairwise-additive parts of the oxDNA force field
-for coarse-grained modelling of DNA. The effective interaction between the nucleotides consists of potentials for the
+The {oxdna2} pair styles compute the pairwise-additive parts of the oxDNA force field
+for coarse-grained modelling of DNA. The effective interaction between the nucleotides consists of potentials for the
excluded volume interaction {oxdna2/excv}, the stacking {oxdna2/stk}, cross-stacking {oxdna2/xstk}
and coaxial stacking interaction {oxdna2/coaxstk}, electrostatic Debye-Hueckel interaction {oxdna2/dh}
as well as the hydrogen-bonding interaction {oxdna2/hbond} between complementary pairs of nucleotides on
opposite strands.
-The exact functional form of the pair styles is rather complex.
-The individual potentials consist of products of modulation factors,
-which themselves are constructed from a number of more basic potentials
-(Morse, Lennard-Jones, harmonic angle and distance) as well as quadratic smoothing and modulation terms.
+The exact functional form of the pair styles is rather complex.
+The individual potentials consist of products of modulation factors,
+which themselves are constructed from a number of more basic potentials
+(Morse, Lennard-Jones, harmonic angle and distance) as well as quadratic smoothing and modulation terms.
We refer to "(Snodin)"_#Snodin and the original oxDNA publications "(Ouldridge-DPhil)"_#Ouldridge-DPhil2
and "(Ouldridge)"_#Ouldridge2 for a detailed description of the oxDNA2 force field.
@@ -63,7 +63,7 @@ NOTE: These pair styles have to be used together with the related oxDNA2 bond st
{oxdna2/fene} for the connectivity of the phosphate backbone (see also documentation of
"bond_style oxdna2/fene"_bond_oxdna.html). Almost all coefficients
in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model.
-Exceptions are the first coefficient after {oxdna2/stk} (T=0.1 in the above example) and the coefficients
+Exceptions are the first coefficient after {oxdna2/stk} (T=0.1 in the above example) and the coefficients
after {oxdna2/dh} (T=0.1, rhos=1.0, qeff=0.815 in the above example). When using a Langevin thermostat
e.g. through "fix langevin"_fix_langevin.html or "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html
the temperature coefficients have to be matched to the one used in the fix.
@@ -86,7 +86,7 @@ LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
"bond_style oxdna2/fene"_bond_oxdna.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "pair_coeff"_pair_coeff.html,
-"bond_style oxdna/fene"_bond_oxdna.html, "pair_style oxdna/excv"_pair_oxdna.html
+"bond_style oxdna/fene"_bond_oxdna.html, "pair_style oxdna/excv"_pair_oxdna.html
[Default:] none
diff --git a/doc/src/pair_table_rx.txt b/doc/src/pair_table_rx.txt
index d089a4f9da..f93af21da4 100644
--- a/doc/src/pair_table_rx.txt
+++ b/doc/src/pair_table_rx.txt
@@ -85,9 +85,9 @@ tags must either correspond to the species defined in the reaction
kinetics files specified with the "fix rx"_fix_rx.html command or they
must correspond to the tag "1fluid", signifying interaction with a
product species mixture determined through a one-fluid approximation.
-The interaction potential is weighted by the geometric average of
-either the mole fraction concentrations or the number of molecules
-associated with the interacting coarse-grained particles (see the
+The interaction potential is weighted by the geometric average of
+either the mole fraction concentrations or the number of molecules
+associated with the interacting coarse-grained particles (see the
{fractional} or {molecular} weighting pair style options). The coarse-grained potential is
stored before and after the reaction kinetics solver is applied, where
the difference is defined to be the internal chemical energy (uChem).
diff --git a/doc/src/python.txt b/doc/src/python.txt
index e00b90234c..c6538ded45 100644
--- a/doc/src/python.txt
+++ b/doc/src/python.txt
@@ -489,7 +489,7 @@ python"_Section_python.html. Note that it is important that the
stand-alone LAMMPS executable and the LAMMPS shared library be
consistent (built from the same source code files) in order for this
to work. If the two have been built at different times using
-different source files, problems may occur.
+different source files, problems may occur.
[Related commands:]
diff --git a/doc/src/run_style.txt b/doc/src/run_style.txt
index 0e3c1a939f..a67899420b 100644
--- a/doc/src/run_style.txt
+++ b/doc/src/run_style.txt
@@ -17,7 +17,7 @@ style = {verlet} or {verlet/split} or {respa} or {respa/omp} :ulb,l
{verlet/split} args = none
{respa} args = N n1 n2 ... keyword values ...
N = # of levels of rRESPA
- n1, n2, ... = loop factor between rRESPA levels (N-1 values)
+ n1, n2, ... = loop factors between rRESPA levels (N-1 values)
zero or more keyword/value pairings may be appended to the loop factors
keyword = {bond} or {angle} or {dihedral} or {improper} or
{pair} or {inner} or {middle} or {outer} or {hybrid} or {kspace}
@@ -55,7 +55,7 @@ style = {verlet} or {verlet/split} or {respa} or {respa/omp} :ulb,l
run_style verlet
run_style respa 4 2 2 2 bond 1 dihedral 2 pair 3 kspace 4
-run_style respa 4 2 2 2 bond 1 dihedral 2 inner 3 5.0 6.0 outer 4 kspace 4 :pre
+run_style respa 4 2 2 2 bond 1 dihedral 2 inner 3 5.0 6.0 outer 4 kspace 4
run_style respa 3 4 2 bond 1 hybrid 2 2 1 kspace 3 :pre
[Description:]
diff --git a/doc/src/tutorial_github.txt b/doc/src/tutorial_github.txt
index d6ec22589b..3e10b821ae 100644
--- a/doc/src/tutorial_github.txt
+++ b/doc/src/tutorial_github.txt
@@ -86,7 +86,7 @@ machine via HTTPS:
or, if you have set up your GitHub account for using SSH keys, via SSH:
$ git clone git@github.com:/lammps.git :pre
-
+
You can find the proper URL by clicking the "Clone or download"-button:
:c,image(JPG/tutorial_https_block.png)
diff --git a/doc/src/tutorial_pylammps.txt b/doc/src/tutorial_pylammps.txt
index 0b4fb32ed2..78cdd241fb 100644
--- a/doc/src/tutorial_pylammps.txt
+++ b/doc/src/tutorial_pylammps.txt
@@ -36,7 +36,7 @@ lammps.PyLammps :h4
higher-level abstraction built on top of original C-Types interface
manipulation of Python objects
-communication with LAMMPS is hidden from API user
+communication with LAMMPS is hidden from API user
shorter, more concise Python
better IPython integration, designed for quick prototyping :ul
@@ -328,7 +328,7 @@ IPyLammps Examples :h2
Examples of IPython notebooks can be found in the python/examples/pylammps
subdirectory. To open these notebooks launch {jupyter notebook} inside this
-directory and navigate to one of them. If you compiled and installed
+directory and navigate to one of them. If you compiled and installed
a LAMMPS shared library with exceptions, PNG, JPEG and FFMPEG support
you should be able to rerun all of these notebooks.
@@ -399,19 +399,19 @@ natoms = L.system.natoms :pre
for i in range(niterations):
iatom = random.randrange(0, natoms)
current_atom = L.atoms\[iatom\] :pre
-
+
x0, y0 = current_atom.position :pre
-
+
dx = deltamove * random.uniform(-1, 1)
dy = deltamove * random.uniform(-1, 1) :pre
-
+
current_atom.position = (x0+dx, y0+dy) :pre
-
+
L.run(1, "pre no post no") :pre
-
+
e = L.eval("pe")
energies.append(e) :pre
-
+
if e <= elast:
naccept += 1
elast = e
@@ -460,4 +460,4 @@ Feedback and Contributing :h2
If you find this Python interface useful, please feel free to provide feedback
and ideas on how to improve it to Richard Berger (richard.berger@temple.edu). We also
want to encourage people to write tutorial style IPython notebooks showcasing LAMMPS usage
-and maybe their latest research results.
+and maybe their latest research results.
diff --git a/examples/USER/misc/filter_corotate/in.bpti b/examples/USER/misc/filter_corotate/in.bpti
index 6507a78704..2e4d8dda6f 100644
--- a/examples/USER/misc/filter_corotate/in.bpti
+++ b/examples/USER/misc/filter_corotate/in.bpti
@@ -28,7 +28,7 @@ thermo 100
thermo_style multi
timestep 8
-run_style respa 3 2 8 bond 1 pair 2 kspace 3
+run_style respa 3 2 8 bond 1 dihedral 2 pair 2 kspace 3
velocity all create 200.0 12345678 dist uniform
#dump dump1 all atom 100 4pti.dump
diff --git a/examples/USER/misc/filter_corotate/in.peptide b/examples/USER/misc/filter_corotate/in.peptide
index 0a17f995b3..e10dc09f0d 100644
--- a/examples/USER/misc/filter_corotate/in.peptide
+++ b/examples/USER/misc/filter_corotate/in.peptide
@@ -20,7 +20,7 @@ thermo 50
timestep 8
-run_style respa 3 2 8 bond 1 pair 2 kspace 3
+run_style respa 3 2 8 bond 1 dihedral 2 pair 2 kspace 3
fix 1 all nvt temp 250.0 250.0 100.0 tchain 1
fix cor all filter/corotate m 1.0
diff --git a/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.1 b/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.1
deleted file mode 100644
index 5253b47b2d..0000000000
--- a/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.1
+++ /dev/null
@@ -1,240 +0,0 @@
-LAMMPS (10 Mar 2017)
- using 1 OpenMP thread(s) per MPI task
-
-units real
-
-atom_style full
-bond_style harmonic
-angle_style charmm
-dihedral_style charmm
-improper_style harmonic
-
-pair_style lj/charmm/coul/long 8 10
-pair_modify mix arithmetic
-kspace_style pppm 1e-4
-
-read_data data.bpti
- orthogonal box = (-10 -10 -30) to (50 50 30)
- 1 by 1 by 1 MPI processor grid
- reading atoms ...
- 892 atoms
- scanning bonds ...
- 4 = max bonds/atom
- scanning angles ...
- 6 = max angles/atom
- scanning dihedrals ...
- 18 = max dihedrals/atom
- scanning impropers ...
- 2 = max impropers/atom
- reading bonds ...
- 906 bonds
- reading angles ...
- 1626 angles
- reading dihedrals ...
- 2501 dihedrals
- reading impropers ...
- 137 impropers
- 4 = max # of 1-2 neighbors
- 9 = max # of 1-3 neighbors
- 19 = max # of 1-4 neighbors
- 21 = max # of special neighbors
-
-special_bonds charmm
-neigh_modify delay 2 every 1
-
-
-# ------------- MINIMIZE ----------
-
-minimize 1e-4 1e-6 1000 10000
-WARNING: Resetting reneighboring criteria during minimization (../min.cpp:168)
-PPPM initialization ...
-WARNING: System is not charge neutral, net charge = 6 (../kspace.cpp:302)
-WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
- G vector (1/distance) = 0.203272
- grid = 16 16 16
- stencil order = 5
- estimated absolute RMS force accuracy = 0.0316399
- estimated relative force accuracy = 9.52826e-05
- using double precision FFTs
- 3d grid and FFT values/proc = 9261 4096
-Neighbor list info ...
- update every 1 steps, delay 0 steps, check yes
- max neighbors/atom: 2000, page size: 100000
- master list distance cutoff = 12
- ghost atom cutoff = 12
- binsize = 6, bins = 10 10 10
- 1 neighbor lists, perpetual/occasional/extra = 1 0 0
- (1) pair lj/charmm/coul/long, perpetual
- attributes: half, newton on
- pair build: half/bin/newton
- stencil: half/bin/3d/newton
- bin: standard
-Per MPI rank memory usage (min/avg/max) = 17.8596/1/0 Mbytes
-Step Temp E_pair E_mol TotEng Press
- 0 0 -3075.6498 943.91164 -2131.7381 -380.67776
- 241 0 -4503.313 749.58662 -3753.7264 -29.045104
-Loop time of 3.35722 on 1 procs for 241 steps with 892 atoms
-
-99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
-
-Minimization stats:
- Stopping criterion = energy tolerance
- Energy initial, next-to-last, final =
- -2131.73812515 -3753.43984087 -3753.72636847
- Force two-norm initial, final = 1086.21 26.3688
- Force max component initial, final = 310.811 3.92748
- Final line search alpha, max atom move = 0.00596649 0.0234333
- Iterations, force evaluations = 241 463
-
-MPI task timing breakdown:
-Section | min time | avg time | max time |%varavg| %total
----------------------------------------------------------------
-Pair | 2.5003 | 2.5003 | 2.5003 | 0.0 | 74.48
-Bond | 0.24287 | 0.24287 | 0.24287 | 0.0 | 7.23
-Kspace | 0.53428 | 0.53428 | 0.53428 | 0.0 | 15.91
-Neigh | 0.069765 | 0.069765 | 0.069765 | 0.0 | 2.08
-Comm | 0.00065374 | 0.00065374 | 0.00065374 | 0.0 | 0.02
-Output | 0 | 0 | 0 | 0.0 | 0.00
-Modify | 0 | 0 | 0 | 0.0 | 0.00
-Other | | 0.009358 | | | 0.28
-
-Nlocal: 892 ave 892 max 892 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost: 31 ave 31 max 31 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs: 148891 ave 148891 max 148891 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 148891
-Ave neighs/atom = 166.918
-Ave special neighs/atom = 10.9395
-Neighbor list builds = 15
-Dangerous builds = 0
-reset_timestep 0
-
-# ------------- RUN ---------------
-
-thermo 100
-thermo_style multi
-timestep 8
-
-run_style respa 3 2 8 bond 1 pair 2 kspace 3
-Respa levels:
- 1 = bond angle dihedral improper
- 2 = pair
- 3 = kspace
-
-velocity all create 200.0 12345678 dist uniform
-#dump dump1 all atom 100 4pti.dump
-
-fix 1 all nvt temp 200 300 25
-fix cor all filter/corotate m 1.0
- 163 = # of size 2 clusters
- 0 = # of size 3 clusters
- 25 = # of size 4 clusters
- 0 = # of size 5 clusters
- 100 = # of frozen angles
-
-run 1000
-PPPM initialization ...
-WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
- G vector (1/distance) = 0.203272
- grid = 16 16 16
- stencil order = 5
- estimated absolute RMS force accuracy = 0.0316399
- estimated relative force accuracy = 9.52826e-05
- using double precision FFTs
- 3d grid and FFT values/proc = 9261 4096
-Per MPI rank memory usage (min/avg/max) = 19.5425/1/0 Mbytes
----------------- Step 0 ----- CPU = 0.0000 (sec) ----------------
-TotEng = -3220.3378 KinEng = 531.1804 Temp = 200.0000
-PotEng = -3751.5181 E_bond = 42.2810 E_angle = 345.2592
-E_dihed = 337.8361 E_impro = 24.2103 E_vdwl = -288.5339
-E_coul = -886.3622 E_long = -3326.2088 Press = 83.2283
----------------- Step 100 ----- CPU = 3.9414 (sec) ----------------
-TotEng = -2718.8970 KinEng = 538.6206 Temp = 202.8014
-PotEng = -3257.5176 E_bond = 203.3367 E_angle = 566.5317
-E_dihed = 397.6202 E_impro = 34.6623 E_vdwl = -248.7451
-E_coul = -874.5122 E_long = -3336.4111 Press = 135.8662
----------------- Step 200 ----- CPU = 7.9028 (sec) ----------------
-TotEng = -2660.1406 KinEng = 626.3319 Temp = 235.8265
-PotEng = -3286.4725 E_bond = 209.5147 E_angle = 591.7773
-E_dihed = 388.9591 E_impro = 29.4992 E_vdwl = -243.5808
-E_coul = -923.5115 E_long = -3339.1306 Press = 88.9000
----------------- Step 300 ----- CPU = 11.8246 (sec) ----------------
-TotEng = -2673.8090 KinEng = 616.7924 Temp = 232.2346
-PotEng = -3290.6014 E_bond = 202.8254 E_angle = 568.6860
-E_dihed = 378.4182 E_impro = 38.2399 E_vdwl = -221.3236
-E_coul = -915.3004 E_long = -3342.1468 Press = 78.8527
----------------- Step 400 ----- CPU = 15.7990 (sec) ----------------
-TotEng = -2614.9416 KinEng = 649.3474 Temp = 244.4922
-PotEng = -3264.2890 E_bond = 211.6116 E_angle = 617.2026
-E_dihed = 399.8744 E_impro = 40.2678 E_vdwl = -211.7790
-E_coul = -978.1624 E_long = -3343.3041 Press = -4.1958
----------------- Step 500 ----- CPU = 19.8146 (sec) ----------------
-TotEng = -2588.6772 KinEng = 660.1424 Temp = 248.5568
-PotEng = -3248.8196 E_bond = 218.4786 E_angle = 620.8605
-E_dihed = 390.3220 E_impro = 41.6794 E_vdwl = -226.3657
-E_coul = -953.1676 E_long = -3340.6269 Press = 99.3200
----------------- Step 600 ----- CPU = 23.8587 (sec) ----------------
-TotEng = -2550.4618 KinEng = 693.3384 Temp = 261.0557
-PotEng = -3243.8002 E_bond = 232.3563 E_angle = 606.2922
-E_dihed = 396.2469 E_impro = 37.1980 E_vdwl = -235.8425
-E_coul = -937.1208 E_long = -3342.9303 Press = -21.7737
----------------- Step 700 ----- CPU = 27.8381 (sec) ----------------
-TotEng = -2554.4355 KinEng = 692.8951 Temp = 260.8888
-PotEng = -3247.3306 E_bond = 216.3395 E_angle = 637.7785
-E_dihed = 391.5940 E_impro = 43.1426 E_vdwl = -187.6159
-E_coul = -1008.1694 E_long = -3340.3998 Press = 75.1484
----------------- Step 800 ----- CPU = 31.8039 (sec) ----------------
-TotEng = -2508.3551 KinEng = 699.0766 Temp = 263.2163
-PotEng = -3207.4317 E_bond = 241.9936 E_angle = 641.3631
-E_dihed = 386.2198 E_impro = 43.7793 E_vdwl = -217.7523
-E_coul = -964.6070 E_long = -3338.4282 Press = -127.7337
----------------- Step 900 ----- CPU = 35.7700 (sec) ----------------
-TotEng = -2452.7644 KinEng = 762.1842 Temp = 286.9776
-PotEng = -3214.9485 E_bond = 243.9191 E_angle = 649.8664
-E_dihed = 382.4351 E_impro = 39.0029 E_vdwl = -221.3389
-E_coul = -970.8965 E_long = -3337.9366 Press = 122.7720
----------------- Step 1000 ----- CPU = 39.7695 (sec) ----------------
-TotEng = -2386.6805 KinEng = 799.0253 Temp = 300.8490
-PotEng = -3185.7058 E_bond = 265.3649 E_angle = 661.7543
-E_dihed = 374.6843 E_impro = 38.6877 E_vdwl = -229.2030
-E_coul = -960.7041 E_long = -3336.2899 Press = -17.9910
-Loop time of 39.7695 on 1 procs for 1000 steps with 892 atoms
-
-Performance: 17.380 ns/day, 1.381 hours/ns, 25.145 timesteps/s
-99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section | min time | avg time | max time |%varavg| %total
----------------------------------------------------------------
-Pair | 29.169 | 29.169 | 29.169 | 0.0 | 73.34
-Bond | 7.6249 | 7.6249 | 7.6249 | 0.0 | 19.17
-Kspace | 1.1525 | 1.1525 | 1.1525 | 0.0 | 2.90
-Neigh | 0.87606 | 0.87606 | 0.87606 | 0.0 | 2.20
-Comm | 0.01563 | 0.01563 | 0.01563 | 0.0 | 0.04
-Output | 0.00048423 | 0.00048423 | 0.00048423 | 0.0 | 0.00
-Modify | 0.80446 | 0.80446 | 0.80446 | 0.0 | 2.02
-Other | | 0.1266 | | | 0.32
-
-Nlocal: 892 ave 892 max 892 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost: 27 ave 27 max 27 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs: 146206 ave 146206 max 146206 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 146206
-Ave neighs/atom = 163.908
-Ave special neighs/atom = 10.9395
-Neighbor list builds = 186
-Dangerous builds = 0
-
-unfix cor
-unfix 1
-
-
-Please see the log.cite file for references relevant to this simulation
-
-Total wall time: 0:00:43
diff --git a/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.4 b/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.4
deleted file mode 100644
index 4300c1caf5..0000000000
--- a/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.4
+++ /dev/null
@@ -1,240 +0,0 @@
-LAMMPS (10 Mar 2017)
- using 1 OpenMP thread(s) per MPI task
-
-units real
-
-atom_style full
-bond_style harmonic
-angle_style charmm
-dihedral_style charmm
-improper_style harmonic
-
-pair_style lj/charmm/coul/long 8 10
-pair_modify mix arithmetic
-kspace_style pppm 1e-4
-
-read_data data.bpti
- orthogonal box = (-10 -10 -30) to (50 50 30)
- 1 by 2 by 2 MPI processor grid
- reading atoms ...
- 892 atoms
- scanning bonds ...
- 4 = max bonds/atom
- scanning angles ...
- 6 = max angles/atom
- scanning dihedrals ...
- 18 = max dihedrals/atom
- scanning impropers ...
- 2 = max impropers/atom
- reading bonds ...
- 906 bonds
- reading angles ...
- 1626 angles
- reading dihedrals ...
- 2501 dihedrals
- reading impropers ...
- 137 impropers
- 4 = max # of 1-2 neighbors
- 9 = max # of 1-3 neighbors
- 19 = max # of 1-4 neighbors
- 21 = max # of special neighbors
-
-special_bonds charmm
-neigh_modify delay 2 every 1
-
-
-# ------------- MINIMIZE ----------
-
-minimize 1e-4 1e-6 1000 10000
-WARNING: Resetting reneighboring criteria during minimization (../min.cpp:168)
-PPPM initialization ...
-WARNING: System is not charge neutral, net charge = 6 (../kspace.cpp:302)
-WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
- G vector (1/distance) = 0.203272
- grid = 16 16 16
- stencil order = 5
- estimated absolute RMS force accuracy = 0.0316399
- estimated relative force accuracy = 9.52826e-05
- using double precision FFTs
- 3d grid and FFT values/proc = 3549 1024
-Neighbor list info ...
- update every 1 steps, delay 0 steps, check yes
- max neighbors/atom: 2000, page size: 100000
- master list distance cutoff = 12
- ghost atom cutoff = 12
- binsize = 6, bins = 10 10 10
- 1 neighbor lists, perpetual/occasional/extra = 1 0 0
- (1) pair lj/charmm/coul/long, perpetual
- attributes: half, newton on
- pair build: half/bin/newton
- stencil: half/bin/3d/newton
- bin: standard
-Per MPI rank memory usage (min/avg/max) = 16.9693/0.981879/0 Mbytes
-Step Temp E_pair E_mol TotEng Press
- 0 0 -3075.6498 943.91164 -2131.7381 -380.67776
- 241 0 -4503.3131 749.58666 -3753.7264 -29.045153
-Loop time of 1.26594 on 4 procs for 241 steps with 892 atoms
-
-99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-
-Minimization stats:
- Stopping criterion = energy tolerance
- Energy initial, next-to-last, final =
- -2131.73812515 -3753.43983927 -3753.72640137
- Force two-norm initial, final = 1086.21 26.3688
- Force max component initial, final = 310.811 3.92751
- Final line search alpha, max atom move = 0.00596649 0.0234334
- Iterations, force evaluations = 241 463
-
-MPI task timing breakdown:
-Section | min time | avg time | max time |%varavg| %total
----------------------------------------------------------------
-Pair | 0.34267 | 0.63792 | 0.90268 | 25.2 | 50.39
-Bond | 0.025776 | 0.063318 | 0.095631 | 10.8 | 5.00
-Kspace | 0.21904 | 0.51601 | 0.84895 | 31.3 | 40.76
-Neigh | 0.023185 | 0.023363 | 0.023538 | 0.1 | 1.85
-Comm | 0.012025 | 0.014189 | 0.016335 | 1.4 | 1.12
-Output | 0 | 0 | 0 | 0.0 | 0.00
-Modify | 0 | 0 | 0 | 0.0 | 0.00
-Other | | 0.01114 | | | 0.88
-
-Nlocal: 223 ave 323 max 89 min
-Histogram: 1 0 0 0 1 0 0 0 1 1
-Nghost: 613 ave 675 max 557 min
-Histogram: 1 0 0 1 0 1 0 0 0 1
-Neighs: 37222.8 ave 50005 max 20830 min
-Histogram: 1 0 0 0 1 0 0 1 0 1
-
-Total # of neighbors = 148891
-Ave neighs/atom = 166.918
-Ave special neighs/atom = 10.9395
-Neighbor list builds = 15
-Dangerous builds = 0
-reset_timestep 0
-
-# ------------- RUN ---------------
-
-thermo 100
-thermo_style multi
-timestep 8
-
-run_style respa 3 2 8 bond 1 pair 2 kspace 3
-Respa levels:
- 1 = bond angle dihedral improper
- 2 = pair
- 3 = kspace
-
-velocity all create 200.0 12345678 dist uniform
-#dump dump1 all atom 100 4pti.dump
-
-fix 1 all nvt temp 200 300 25
-fix cor all filter/corotate m 1.0
- 163 = # of size 2 clusters
- 0 = # of size 3 clusters
- 25 = # of size 4 clusters
- 0 = # of size 5 clusters
- 100 = # of frozen angles
-
-run 1000
-PPPM initialization ...
-WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
- G vector (1/distance) = 0.203272
- grid = 16 16 16
- stencil order = 5
- estimated absolute RMS force accuracy = 0.0316399
- estimated relative force accuracy = 9.52826e-05
- using double precision FFTs
- 3d grid and FFT values/proc = 3549 1024
-Per MPI rank memory usage (min/avg/max) = 17.142/0.97212/0 Mbytes
----------------- Step 0 ----- CPU = 0.0000 (sec) ----------------
-TotEng = -3220.3378 KinEng = 531.1804 Temp = 200.0000
-PotEng = -3751.5182 E_bond = 42.2810 E_angle = 345.2592
-E_dihed = 337.8361 E_impro = 24.2103 E_vdwl = -288.5339
-E_coul = -886.3622 E_long = -3326.2088 Press = 83.2282
----------------- Step 100 ----- CPU = 1.5457 (sec) ----------------
-TotEng = -2718.9184 KinEng = 538.6205 Temp = 202.8014
-PotEng = -3257.5389 E_bond = 203.3365 E_angle = 566.5311
-E_dihed = 397.6202 E_impro = 34.6621 E_vdwl = -248.7451
-E_coul = -874.5326 E_long = -3336.4111 Press = 135.8435
----------------- Step 200 ----- CPU = 3.0720 (sec) ----------------
-TotEng = -2660.1146 KinEng = 626.3474 Temp = 235.8323
-PotEng = -3286.4620 E_bond = 209.5168 E_angle = 591.7735
-E_dihed = 388.9615 E_impro = 29.5000 E_vdwl = -243.5840
-E_coul = -923.4998 E_long = -3339.1299 Press = 88.8857
----------------- Step 300 ----- CPU = 4.5597 (sec) ----------------
-TotEng = -2669.7442 KinEng = 619.3625 Temp = 233.2023
-PotEng = -3289.1067 E_bond = 203.4405 E_angle = 569.5281
-E_dihed = 378.3314 E_impro = 38.2880 E_vdwl = -221.1904
-E_coul = -915.3396 E_long = -3342.1646 Press = 79.3780
----------------- Step 400 ----- CPU = 5.9808 (sec) ----------------
-TotEng = -2618.9975 KinEng = 644.6145 Temp = 242.7102
-PotEng = -3263.6119 E_bond = 209.5864 E_angle = 618.8954
-E_dihed = 401.3798 E_impro = 39.9064 E_vdwl = -212.1271
-E_coul = -977.1589 E_long = -3344.0940 Press = -7.8938
----------------- Step 500 ----- CPU = 7.4159 (sec) ----------------
-TotEng = -2579.7486 KinEng = 666.4643 Temp = 250.9371
-PotEng = -3246.2129 E_bond = 219.2549 E_angle = 620.3474
-E_dihed = 388.4395 E_impro = 41.4499 E_vdwl = -225.9686
-E_coul = -949.3689 E_long = -3340.3672 Press = 113.2543
----------------- Step 600 ----- CPU = 8.9252 (sec) ----------------
-TotEng = -2535.8235 KinEng = 708.5919 Temp = 266.7990
-PotEng = -3244.4154 E_bond = 243.9451 E_angle = 606.0866
-E_dihed = 400.0562 E_impro = 33.9708 E_vdwl = -223.1319
-E_coul = -964.9940 E_long = -3340.3482 Press = -102.4475
----------------- Step 700 ----- CPU = 10.4022 (sec) ----------------
-TotEng = -2552.6681 KinEng = 702.3080 Temp = 264.4330
-PotEng = -3254.9761 E_bond = 250.8834 E_angle = 639.0977
-E_dihed = 386.4014 E_impro = 42.3004 E_vdwl = -224.4816
-E_coul = -1011.8551 E_long = -3337.3222 Press = 10.6424
----------------- Step 800 ----- CPU = 11.8699 (sec) ----------------
-TotEng = -2423.5415 KinEng = 772.1254 Temp = 290.7206
-PotEng = -3195.6670 E_bond = 238.5831 E_angle = 640.9180
-E_dihed = 377.7994 E_impro = 40.3135 E_vdwl = -216.5705
-E_coul = -935.1087 E_long = -3341.6019 Press = -38.2479
----------------- Step 900 ----- CPU = 13.3548 (sec) ----------------
-TotEng = -2394.4779 KinEng = 766.6895 Temp = 288.6739
-PotEng = -3161.1673 E_bond = 284.8428 E_angle = 671.0959
-E_dihed = 380.3406 E_impro = 51.2975 E_vdwl = -219.5211
-E_coul = -990.6305 E_long = -3338.5925 Press = -15.2279
----------------- Step 1000 ----- CPU = 14.7908 (sec) ----------------
-TotEng = -2340.1471 KinEng = 799.0198 Temp = 300.8469
-PotEng = -3139.1669 E_bond = 271.0389 E_angle = 683.8278
-E_dihed = 407.0795 E_impro = 39.6209 E_vdwl = -230.5355
-E_coul = -974.2981 E_long = -3335.9003 Press = -94.3420
-Loop time of 14.7909 on 4 procs for 1000 steps with 892 atoms
-
-Performance: 46.732 ns/day, 0.514 hours/ns, 67.609 timesteps/s
-99.1% CPU use with 4 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section | min time | avg time | max time |%varavg| %total
----------------------------------------------------------------
-Pair | 4.4184 | 7.5543 | 10.133 | 74.2 | 51.07
-Bond | 0.94027 | 1.9781 | 2.7492 | 54.4 | 13.37
-Kspace | 0.45487 | 0.45887 | 0.46343 | 0.4 | 3.10
-Neigh | 0.28145 | 0.28339 | 0.28539 | 0.3 | 1.92
-Comm | 0.7515 | 4.1484 | 8.3861 | 135.5 | 28.05
-Output | 0.00049973 | 0.00055474 | 0.00066924 | 0.0 | 0.00
-Modify | 0.26165 | 0.31142 | 0.35023 | 6.7 | 2.11
-Other | | 0.05572 | | | 0.38
-
-Nlocal: 223 ave 313 max 122 min
-Histogram: 1 0 0 1 0 0 0 1 0 1
-Nghost: 584.5 ave 605 max 553 min
-Histogram: 1 0 0 0 0 1 0 0 0 2
-Neighs: 35448 ave 42093 max 25175 min
-Histogram: 1 0 0 0 0 0 1 1 0 1
-
-Total # of neighbors = 141792
-Ave neighs/atom = 158.96
-Ave special neighs/atom = 10.9395
-Neighbor list builds = 186
-Dangerous builds = 0
-
-unfix cor
-unfix 1
-
-
-Please see the log.cite file for references relevant to this simulation
-
-Total wall time: 0:00:16
diff --git a/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.1 b/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.1
deleted file mode 100644
index 23dd4c8a89..0000000000
--- a/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.1
+++ /dev/null
@@ -1,146 +0,0 @@
-LAMMPS (10 Mar 2017)
- using 1 OpenMP thread(s) per MPI task
-# Solvated 5-mer peptide, run for 8ps in NVT
-
-units real
-atom_style full
-
-pair_style lj/charmm/coul/long 8.0 10.0 10.0
-bond_style harmonic
-angle_style charmm
-dihedral_style charmm
-improper_style harmonic
-kspace_style pppm 0.0001
-
-read_data data.peptide
- orthogonal box = (36.8402 41.0137 29.7681) to (64.2116 68.3851 57.1395)
- 1 by 1 by 1 MPI processor grid
- reading atoms ...
- 2004 atoms
- reading velocities ...
- 2004 velocities
- scanning bonds ...
- 3 = max bonds/atom
- scanning angles ...
- 6 = max angles/atom
- scanning dihedrals ...
- 14 = max dihedrals/atom
- scanning impropers ...
- 1 = max impropers/atom
- reading bonds ...
- 1365 bonds
- reading angles ...
- 786 angles
- reading dihedrals ...
- 207 dihedrals
- reading impropers ...
- 12 impropers
- 4 = max # of 1-2 neighbors
- 7 = max # of 1-3 neighbors
- 14 = max # of 1-4 neighbors
- 18 = max # of special neighbors
-
-neighbor 2.0 bin
-neigh_modify delay 5
-
-thermo 50
-#dump dump1 all atom 100 peptide.dump
-
-timestep 8
-
-run_style respa 3 2 8 bond 1 pair 2 kspace 3
-Respa levels:
- 1 = bond angle dihedral improper
- 2 = pair
- 3 = kspace
-
-fix 1 all nvt temp 250.0 250.0 100.0 tchain 1
-fix cor all filter/corotate m 1.0
- 19 = # of size 2 clusters
- 0 = # of size 3 clusters
- 3 = # of size 4 clusters
- 0 = # of size 5 clusters
- 646 = # of frozen angles
-run 1000
-PPPM initialization ...
-WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
- G vector (1/distance) = 0.268725
- grid = 15 15 15
- stencil order = 5
- estimated absolute RMS force accuracy = 0.0228209
- estimated relative force accuracy = 6.87243e-05
- using double precision FFTs
- 3d grid and FFT values/proc = 10648 3375
-Neighbor list info ...
- update every 1 steps, delay 5 steps, check yes
- max neighbors/atom: 2000, page size: 100000
- master list distance cutoff = 12
- ghost atom cutoff = 12
- binsize = 6, bins = 5 5 5
- 1 neighbor lists, perpetual/occasional/extra = 1 0 0
- (1) pair lj/charmm/coul/long, perpetual
- attributes: half, newton on
- pair build: half/bin/newton
- stencil: half/bin/3d/newton
- bin: standard
-Per MPI rank memory usage (min/avg/max) = 22.6706/1/0 Mbytes
-Step Temp E_pair E_mol TotEng Press
- 0 190.0857 -6785.6785 70.391457 -5580.3684 19434.821
- 50 239.46028 -7546.5667 1092.8874 -5023.9668 -24643.891
- 100 242.81799 -7125.5527 416.0788 -5259.7139 15525.465
- 150 235.97108 -7531.9334 932.35464 -5190.6987 -14838.489
- 200 252.06415 -7195.6011 568.02993 -5122.6064 8841.332
- 250 249.99431 -7586.5092 881.83491 -5212.0676 -9330.345
- 300 240.3382 -7333.0933 633.29951 -5264.8395 5137.9757
- 350 255.34529 -7568.2413 856.46371 -5187.2226 -6206.063
- 400 242.99276 -7419.9031 713.23943 -5255.8602 2447.0091
- 450 251.10653 -7622.061 844.20584 -5278.6079 -4906.6559
- 500 255.59314 -7439.253 710.84907 -5202.3691 1571.0032
- 550 253.2025 -7660.5101 823.05373 -5325.695 -4551.399
- 600 249.05313 -7509.6729 741.48104 -5281.2046 992.87
- 650 251.75984 -7593.6589 847.08244 -5243.4286 -3510.1176
- 700 249.25027 -7601.9112 794.0912 -5319.6557 305.76021
- 750 255.415 -7602.2674 822.98524 -5254.3109 -2333.421
- 800 241.99621 -7643.8878 796.53352 -5402.5008 -298.66565
- 850 253.6428 -7598.3764 816.45457 -5267.5316 -1905.3478
- 900 247.20231 -7690.2806 789.75999 -5424.5838 -1331.7228
- 950 255.92583 -7634.7505 831.18272 -5275.5466 -2186.5117
- 1000 253.2126 -7647.9526 823.93602 -5312.195 -1189.9659
-Loop time of 150.664 on 1 procs for 1000 steps with 2004 atoms
-
-Performance: 4.588 ns/day, 5.231 hours/ns, 6.637 timesteps/s
-99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section | min time | avg time | max time |%varavg| %total
----------------------------------------------------------------
-Pair | 135.81 | 135.81 | 135.81 | 0.0 | 90.14
-Bond | 2.5889 | 2.5889 | 2.5889 | 0.0 | 1.72
-Kspace | 2.0379 | 2.0379 | 2.0379 | 0.0 | 1.35
-Neigh | 5.893 | 5.893 | 5.893 | 0.0 | 3.91
-Comm | 1.6998 | 1.6998 | 1.6998 | 0.0 | 1.13
-Output | 0.00077915 | 0.00077915 | 0.00077915 | 0.0 | 0.00
-Modify | 2 | 2 | 2 | 0.0 | 1.33
-Other | | 0.6352 | | | 0.42
-
-Nlocal: 2004 ave 2004 max 2004 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost: 11197 ave 11197 max 11197 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs: 707779 ave 707779 max 707779 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 707779
-Ave neighs/atom = 353.183
-Ave special neighs/atom = 2.34032
-Neighbor list builds = 200
-Dangerous builds = 200
-unfix cor
-unfix 1
-
-
-
-
-Please see the log.cite file for references relevant to this simulation
-
-Total wall time: 0:02:30
diff --git a/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.4 b/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.4
deleted file mode 100644
index 2cdd645fe3..0000000000
--- a/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.4
+++ /dev/null
@@ -1,146 +0,0 @@
-LAMMPS (10 Mar 2017)
- using 1 OpenMP thread(s) per MPI task
-# Solvated 5-mer peptide, run for 8ps in NVT
-
-units real
-atom_style full
-
-pair_style lj/charmm/coul/long 8.0 10.0 10.0
-bond_style harmonic
-angle_style charmm
-dihedral_style charmm
-improper_style harmonic
-kspace_style pppm 0.0001
-
-read_data data.peptide
- orthogonal box = (36.8402 41.0137 29.7681) to (64.2116 68.3851 57.1395)
- 1 by 2 by 2 MPI processor grid
- reading atoms ...
- 2004 atoms
- reading velocities ...
- 2004 velocities
- scanning bonds ...
- 3 = max bonds/atom
- scanning angles ...
- 6 = max angles/atom
- scanning dihedrals ...
- 14 = max dihedrals/atom
- scanning impropers ...
- 1 = max impropers/atom
- reading bonds ...
- 1365 bonds
- reading angles ...
- 786 angles
- reading dihedrals ...
- 207 dihedrals
- reading impropers ...
- 12 impropers
- 4 = max # of 1-2 neighbors
- 7 = max # of 1-3 neighbors
- 14 = max # of 1-4 neighbors
- 18 = max # of special neighbors
-
-neighbor 2.0 bin
-neigh_modify delay 5
-
-thermo 50
-#dump dump1 all atom 100 peptide.dump
-
-timestep 8
-
-run_style respa 3 2 8 bond 1 pair 2 kspace 3
-Respa levels:
- 1 = bond angle dihedral improper
- 2 = pair
- 3 = kspace
-
-fix 1 all nvt temp 250.0 250.0 100.0 tchain 1
-fix cor all filter/corotate m 1.0
- 19 = # of size 2 clusters
- 0 = # of size 3 clusters
- 3 = # of size 4 clusters
- 0 = # of size 5 clusters
- 646 = # of frozen angles
-run 1000
-PPPM initialization ...
-WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
- G vector (1/distance) = 0.268725
- grid = 15 15 15
- stencil order = 5
- estimated absolute RMS force accuracy = 0.0228209
- estimated relative force accuracy = 6.87243e-05
- using double precision FFTs
- 3d grid and FFT values/proc = 4312 960
-Neighbor list info ...
- update every 1 steps, delay 5 steps, check yes
- max neighbors/atom: 2000, page size: 100000
- master list distance cutoff = 12
- ghost atom cutoff = 12
- binsize = 6, bins = 5 5 5
- 1 neighbor lists, perpetual/occasional/extra = 1 0 0
- (1) pair lj/charmm/coul/long, perpetual
- attributes: half, newton on
- pair build: half/bin/newton
- stencil: half/bin/3d/newton
- bin: standard
-Per MPI rank memory usage (min/avg/max) = 16.8394/0.98826/0 Mbytes
-Step Temp E_pair E_mol TotEng Press
- 0 190.0857 -6785.6785 70.391457 -5580.3684 19434.821
- 50 239.46028 -7546.5668 1092.8874 -5023.9668 -24643.891
- 100 242.81819 -7125.5629 416.08082 -5259.7209 15525.244
- 150 235.94928 -7531.9186 932.50658 -5190.6621 -14842.431
- 200 255.85551 -7254.4065 568.8803 -5157.9249 8936.8651
- 250 247.8705 -7607.4583 858.06087 -5269.4711 -9926.0442
- 300 257.64176 -7267.424 618.5573 -5110.6004 5173.3307
- 350 251.65439 -7572.3806 821.15745 -5248.7049 -7092.327
- 400 256.87927 -7414.2145 655.33178 -5225.169 4119.4095
- 450 257.12393 -7576.5541 853.39773 -5187.9819 -5224.8823
- 500 242.42371 -7524.705 705.75357 -5371.5455 2111.3878
- 550 248.97188 -7541.076 792.86994 -5261.7038 -2278.4185
- 600 249.81862 -7592.0499 767.17722 -5333.3149 -1149.4759
- 650 253.31349 -7578.2665 813.75975 -5252.0827 -2915.5706
- 700 256.61152 -7588.1475 761.03356 -5294.9988 -747.88089
- 750 248.3606 -7660.457 837.71615 -5339.8883 -3072.8311
- 800 253.81464 -7638.6089 782.4229 -5340.7698 -1025.909
- 850 245.69185 -7660.9036 795.66792 -5398.3172 -2717.5851
- 900 249.13156 -7589.4769 806.43464 -5295.5867 -761.63361
- 950 251.11482 -7691.4981 869.34937 -5322.852 -3282.3031
- 1000 241.9195 -7630.9899 828.59107 -5358.0033 -95.962685
-Loop time of 45.5507 on 4 procs for 1000 steps with 2004 atoms
-
-Performance: 15.174 ns/day, 1.582 hours/ns, 21.954 timesteps/s
-99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section | min time | avg time | max time |%varavg| %total
----------------------------------------------------------------
-Pair | 35.545 | 36.674 | 38.004 | 15.8 | 80.51
-Bond | 0.51302 | 0.67796 | 0.86345 | 18.6 | 1.49
-Kspace | 0.66031 | 0.68459 | 0.70506 | 2.1 | 1.50
-Neigh | 1.5605 | 1.5627 | 1.5649 | 0.1 | 3.43
-Comm | 3.4611 | 4.9841 | 6.294 | 47.2 | 10.94
-Output | 0.00079799 | 0.00086641 | 0.0010369 | 0.0 | 0.00
-Modify | 0.67341 | 0.69059 | 0.71186 | 1.7 | 1.52
-Other | | 0.2762 | | | 0.61
-
-Nlocal: 501 ave 523 max 473 min
-Histogram: 1 0 0 0 0 0 2 0 0 1
-Nghost: 6643.25 ave 6708 max 6566 min
-Histogram: 1 1 0 0 0 0 0 0 0 2
-Neighs: 176977 ave 185765 max 164931 min
-Histogram: 1 0 0 0 1 0 0 0 1 1
-
-Total # of neighbors = 707908
-Ave neighs/atom = 353.248
-Ave special neighs/atom = 2.34032
-Neighbor list builds = 200
-Dangerous builds = 200
-unfix cor
-unfix 1
-
-
-
-
-Please see the log.cite file for references relevant to this simulation
-
-Total wall time: 0:00:45
diff --git a/examples/USER/misc/filter_corotate/log.22Jun2017.bpti.g++.1 b/examples/USER/misc/filter_corotate/log.22Jun2017.bpti.g++.1
new file mode 100644
index 0000000000..1e708a9d39
--- /dev/null
+++ b/examples/USER/misc/filter_corotate/log.22Jun2017.bpti.g++.1
@@ -0,0 +1,241 @@
+LAMMPS (20 Jun 2017)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
+ using 1 OpenMP thread(s) per MPI task
+
+units real
+
+atom_style full
+bond_style harmonic
+angle_style charmm
+dihedral_style charmm
+improper_style harmonic
+
+pair_style lj/charmm/coul/long 8 10
+pair_modify mix arithmetic
+kspace_style pppm 1e-4
+
+read_data data.bpti
+ orthogonal box = (-10 -10 -30) to (50 50 30)
+ 1 by 1 by 1 MPI processor grid
+ reading atoms ...
+ 892 atoms
+ scanning bonds ...
+ 4 = max bonds/atom
+ scanning angles ...
+ 6 = max angles/atom
+ scanning dihedrals ...
+ 18 = max dihedrals/atom
+ scanning impropers ...
+ 2 = max impropers/atom
+ reading bonds ...
+ 906 bonds
+ reading angles ...
+ 1626 angles
+ reading dihedrals ...
+ 2501 dihedrals
+ reading impropers ...
+ 137 impropers
+ 4 = max # of 1-2 neighbors
+ 9 = max # of 1-3 neighbors
+ 19 = max # of 1-4 neighbors
+ 21 = max # of special neighbors
+
+special_bonds charmm
+neigh_modify delay 2 every 1
+
+
+# ------------- MINIMIZE ----------
+
+minimize 1e-4 1e-6 1000 10000
+WARNING: Resetting reneighboring criteria during minimization (../min.cpp:168)
+PPPM initialization ...
+WARNING: System is not charge neutral, net charge = 6 (../kspace.cpp:302)
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+ G vector (1/distance) = 0.203272
+ grid = 16 16 16
+ stencil order = 5
+ estimated absolute RMS force accuracy = 0.0316399
+ estimated relative force accuracy = 9.52826e-05
+ using double precision FFTs
+ 3d grid and FFT values/proc = 9261 4096
+Neighbor list info ...
+ update every 1 steps, delay 0 steps, check yes
+ max neighbors/atom: 2000, page size: 100000
+ master list distance cutoff = 12
+ ghost atom cutoff = 12
+ binsize = 6, bins = 10 10 10
+ 1 neighbor lists, perpetual/occasional/extra = 1 0 0
+ (1) pair lj/charmm/coul/long, perpetual
+ attributes: half, newton on
+ pair build: half/bin/newton
+ stencil: half/bin/3d/newton
+ bin: standard
+Per MPI rank memory allocation (min/avg/max) = 17.86 | 17.86 | 17.86 Mbytes
+Step Temp E_pair E_mol TotEng Press
+ 0 0 -3075.6498 943.91164 -2131.7381 -380.67776
+ 241 0 -4503.313 749.58662 -3753.7264 -29.045104
+Loop time of 7.63279 on 1 procs for 241 steps with 892 atoms
+
+32.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+Minimization stats:
+ Stopping criterion = energy tolerance
+ Energy initial, next-to-last, final =
+ -2131.73812515 -3753.43984087 -3753.72636847
+ Force two-norm initial, final = 1086.21 26.3688
+ Force max component initial, final = 310.811 3.92748
+ Final line search alpha, max atom move = 0.00596649 0.0234333
+ Iterations, force evaluations = 241 463
+
+MPI task timing breakdown:
+Section | min time | avg time | max time |%varavg| %total
+---------------------------------------------------------------
+Pair | 5.8395 | 5.8395 | 5.8395 | 0.0 | 76.51
+Bond | 0.46414 | 0.46414 | 0.46414 | 0.0 | 6.08
+Kspace | 1.1535 | 1.1535 | 1.1535 | 0.0 | 15.11
+Neigh | 0.14908 | 0.14908 | 0.14908 | 0.0 | 1.95
+Comm | 0.001932 | 0.001932 | 0.001932 | 0.0 | 0.03
+Output | 0 | 0 | 0 | 0.0 | 0.00
+Modify | 0 | 0 | 0 | 0.0 | 0.00
+Other | | 0.02465 | | | 0.32
+
+Nlocal: 892 ave 892 max 892 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost: 31 ave 31 max 31 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs: 148891 ave 148891 max 148891 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 148891
+Ave neighs/atom = 166.918
+Ave special neighs/atom = 10.9395
+Neighbor list builds = 15
+Dangerous builds = 0
+reset_timestep 0
+
+# ------------- RUN ---------------
+
+thermo 100
+thermo_style multi
+timestep 8
+
+run_style respa 3 2 8 bond 1 dihedral 2 pair 2 kspace 3
+Respa levels:
+ 1 = bond angle
+ 2 = dihedral improper pair
+ 3 = kspace
+
+velocity all create 200.0 12345678 dist uniform
+#dump dump1 all atom 100 4pti.dump
+
+fix 1 all nvt temp 200 300 25
+fix cor all filter/corotate m 1.0
+ 163 = # of size 2 clusters
+ 0 = # of size 3 clusters
+ 25 = # of size 4 clusters
+ 0 = # of size 5 clusters
+ 100 = # of frozen angles
+
+run 1000
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+ G vector (1/distance) = 0.203272
+ grid = 16 16 16
+ stencil order = 5
+ estimated absolute RMS force accuracy = 0.0316399
+ estimated relative force accuracy = 9.52826e-05
+ using double precision FFTs
+ 3d grid and FFT values/proc = 9261 4096
+Per MPI rank memory allocation (min/avg/max) = 19.55 | 19.55 | 19.55 Mbytes
+---------------- Step 0 ----- CPU = 0.0000 (sec) ----------------
+TotEng = -3220.3378 KinEng = 531.1804 Temp = 200.0000
+PotEng = -3751.5181 E_bond = 42.2810 E_angle = 345.2592
+E_dihed = 337.8361 E_impro = 24.2103 E_vdwl = -288.5339
+E_coul = -886.3622 E_long = -3326.2088 Press = 83.2283
+---------------- Step 100 ----- CPU = 8.4380 (sec) ----------------
+TotEng = -2718.4258 KinEng = 539.6265 Temp = 203.1802
+PotEng = -3258.0524 E_bond = 203.2307 E_angle = 566.1893
+E_dihed = 397.6759 E_impro = 34.7696 E_vdwl = -248.6577
+E_coul = -874.8466 E_long = -3336.4135 Press = 135.8640
+---------------- Step 200 ----- CPU = 16.9012 (sec) ----------------
+TotEng = -2661.9611 KinEng = 625.0674 Temp = 235.3503
+PotEng = -3287.0285 E_bond = 208.1804 E_angle = 590.8462
+E_dihed = 389.1482 E_impro = 30.5882 E_vdwl = -240.5448
+E_coul = -926.3091 E_long = -3338.9378 Press = 103.4738
+---------------- Step 300 ----- CPU = 25.3046 (sec) ----------------
+TotEng = -2662.4139 KinEng = 622.2647 Temp = 234.2951
+PotEng = -3284.6785 E_bond = 202.4210 E_angle = 573.6793
+E_dihed = 382.8919 E_impro = 41.8973 E_vdwl = -218.9895
+E_coul = -924.8414 E_long = -3341.7372 Press = 40.6746
+---------------- Step 400 ----- CPU = 33.8063 (sec) ----------------
+TotEng = -2604.9431 KinEng = 662.9890 Temp = 249.6286
+PotEng = -3267.9321 E_bond = 195.9116 E_angle = 616.1383
+E_dihed = 407.8502 E_impro = 43.3560 E_vdwl = -219.0377
+E_coul = -966.3118 E_long = -3345.8387 Press = -91.8856
+---------------- Step 500 ----- CPU = 42.3470 (sec) ----------------
+TotEng = -2609.3867 KinEng = 657.0939 Temp = 247.4090
+PotEng = -3266.4806 E_bond = 236.4955 E_angle = 570.6256
+E_dihed = 390.5111 E_impro = 41.9250 E_vdwl = -223.9927
+E_coul = -939.5249 E_long = -3342.5201 Press = 236.7471
+---------------- Step 600 ----- CPU = 50.9590 (sec) ----------------
+TotEng = -2564.7161 KinEng = 701.8494 Temp = 264.2603
+PotEng = -3266.5655 E_bond = 223.5820 E_angle = 582.7722
+E_dihed = 394.6196 E_impro = 43.8581 E_vdwl = -201.7759
+E_coul = -967.4136 E_long = -3342.2079 Press = 26.6595
+---------------- Step 700 ----- CPU = 59.4791 (sec) ----------------
+TotEng = -2510.1142 KinEng = 689.5931 Temp = 259.6455
+PotEng = -3199.7072 E_bond = 254.6476 E_angle = 611.9715
+E_dihed = 403.0624 E_impro = 44.1360 E_vdwl = -205.6377
+E_coul = -964.7455 E_long = -3343.1416 Press = 60.5789
+---------------- Step 800 ----- CPU = 67.9330 (sec) ----------------
+TotEng = -2452.7408 KinEng = 777.5962 Temp = 292.7805
+PotEng = -3230.3370 E_bond = 250.4950 E_angle = 656.6738
+E_dihed = 382.4702 E_impro = 39.5378 E_vdwl = -225.0375
+E_coul = -994.4519 E_long = -3340.0244 Press = -19.6463
+---------------- Step 900 ----- CPU = 76.3690 (sec) ----------------
+TotEng = -2339.9766 KinEng = 808.7116 Temp = 304.4961
+PotEng = -3148.6883 E_bond = 247.7657 E_angle = 679.0658
+E_dihed = 398.2984 E_impro = 43.7890 E_vdwl = -230.2498
+E_coul = -945.8152 E_long = -3341.5422 Press = -64.4343
+---------------- Step 1000 ----- CPU = 84.8757 (sec) ----------------
+TotEng = -2329.1819 KinEng = 822.9820 Temp = 309.8691
+PotEng = -3152.1639 E_bond = 264.9609 E_angle = 691.7104
+E_dihed = 385.9914 E_impro = 40.5525 E_vdwl = -230.5182
+E_coul = -954.6203 E_long = -3350.2405 Press = -146.6649
+Loop time of 84.8758 on 1 procs for 1000 steps with 892 atoms
+
+Performance: 8.144 ns/day, 2.947 hours/ns, 11.782 timesteps/s
+32.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section | min time | avg time | max time |%varavg| %total
+---------------------------------------------------------------
+Pair | 68.548 | 68.548 | 68.548 | 0.0 | 80.76
+Bond | 10.263 | 10.263 | 10.263 | 0.0 | 12.09
+Kspace | 2.4528 | 2.4528 | 2.4528 | 0.0 | 2.89
+Neigh | 1.9041 | 1.9041 | 1.9041 | 0.0 | 2.24
+Comm | 0.044126 | 0.044126 | 0.044126 | 0.0 | 0.05
+Output | 0.000983 | 0.000983 | 0.000983 | 0.0 | 0.00
+Modify | 1.4113 | 1.4113 | 1.4113 | 0.0 | 1.66
+Other | | 0.2516 | | | 0.30
+
+Nlocal: 892 ave 892 max 892 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost: 38 ave 38 max 38 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs: 144068 ave 144068 max 144068 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 144068
+Ave neighs/atom = 161.511
+Ave special neighs/atom = 10.9395
+Neighbor list builds = 190
+Dangerous builds = 0
+
+unfix cor
+unfix 1
+
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:01:32
diff --git a/examples/USER/misc/filter_corotate/log.22Jun2017.bpti.g++.4 b/examples/USER/misc/filter_corotate/log.22Jun2017.bpti.g++.4
new file mode 100644
index 0000000000..5367f0e624
--- /dev/null
+++ b/examples/USER/misc/filter_corotate/log.22Jun2017.bpti.g++.4
@@ -0,0 +1,241 @@
+LAMMPS (20 Jun 2017)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
+ using 1 OpenMP thread(s) per MPI task
+
+units real
+
+atom_style full
+bond_style harmonic
+angle_style charmm
+dihedral_style charmm
+improper_style harmonic
+
+pair_style lj/charmm/coul/long 8 10
+pair_modify mix arithmetic
+kspace_style pppm 1e-4
+
+read_data data.bpti
+ orthogonal box = (-10 -10 -30) to (50 50 30)
+ 1 by 2 by 2 MPI processor grid
+ reading atoms ...
+ 892 atoms
+ scanning bonds ...
+ 4 = max bonds/atom
+ scanning angles ...
+ 6 = max angles/atom
+ scanning dihedrals ...
+ 18 = max dihedrals/atom
+ scanning impropers ...
+ 2 = max impropers/atom
+ reading bonds ...
+ 906 bonds
+ reading angles ...
+ 1626 angles
+ reading dihedrals ...
+ 2501 dihedrals
+ reading impropers ...
+ 137 impropers
+ 4 = max # of 1-2 neighbors
+ 9 = max # of 1-3 neighbors
+ 19 = max # of 1-4 neighbors
+ 21 = max # of special neighbors
+
+special_bonds charmm
+neigh_modify delay 2 every 1
+
+
+# ------------- MINIMIZE ----------
+
+minimize 1e-4 1e-6 1000 10000
+WARNING: Resetting reneighboring criteria during minimization (../min.cpp:168)
+PPPM initialization ...
+WARNING: System is not charge neutral, net charge = 6 (../kspace.cpp:302)
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+ G vector (1/distance) = 0.203272
+ grid = 16 16 16
+ stencil order = 5
+ estimated absolute RMS force accuracy = 0.0316399
+ estimated relative force accuracy = 9.52826e-05
+ using double precision FFTs
+ 3d grid and FFT values/proc = 3549 1024
+Neighbor list info ...
+ update every 1 steps, delay 0 steps, check yes
+ max neighbors/atom: 2000, page size: 100000
+ master list distance cutoff = 12
+ ghost atom cutoff = 12
+ binsize = 6, bins = 10 10 10
+ 1 neighbor lists, perpetual/occasional/extra = 1 0 0
+ (1) pair lj/charmm/coul/long, perpetual
+ attributes: half, newton on
+ pair build: half/bin/newton
+ stencil: half/bin/3d/newton
+ bin: standard
+Per MPI rank memory allocation (min/avg/max) = 16.97 | 17.2 | 17.52 Mbytes
+Step Temp E_pair E_mol TotEng Press
+ 0 0 -3075.6498 943.91164 -2131.7381 -380.67776
+ 241 0 -4503.3131 749.58665 -3753.7264 -29.044989
+Loop time of 3.06327 on 4 procs for 241 steps with 892 atoms
+
+31.9% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+Minimization stats:
+ Stopping criterion = energy tolerance
+ Energy initial, next-to-last, final =
+ -2131.73812515 -3753.4398752 -3753.72640446
+ Force two-norm initial, final = 1086.21 26.3687
+ Force max component initial, final = 310.811 3.92765
+ Final line search alpha, max atom move = 0.0059665 0.0234343
+ Iterations, force evaluations = 241 463
+
+MPI task timing breakdown:
+Section | min time | avg time | max time |%varavg| %total
+---------------------------------------------------------------
+Pair | 0.91458 | 1.6235 | 2.2701 | 38.2 | 53.00
+Bond | 0.055164 | 0.13173 | 0.19487 | 15.1 | 4.30
+Kspace | 0.48966 | 1.1993 | 1.9847 | 48.7 | 39.15
+Neigh | 0.053297 | 0.053442 | 0.053576 | 0.0 | 1.74
+Comm | 0.031677 | 0.035006 | 0.038061 | 1.5 | 1.14
+Output | 0 | 0 | 0 | 0.0 | 0.00
+Modify | 0 | 0 | 0 | 0.0 | 0.00
+Other | | 0.02021 | | | 0.66
+
+Nlocal: 223 ave 323 max 89 min
+Histogram: 1 0 0 0 1 0 0 0 1 1
+Nghost: 613 ave 675 max 557 min
+Histogram: 1 0 0 1 0 1 0 0 0 1
+Neighs: 37222.8 ave 50005 max 20830 min
+Histogram: 1 0 0 0 1 0 0 1 0 1
+
+Total # of neighbors = 148891
+Ave neighs/atom = 166.918
+Ave special neighs/atom = 10.9395
+Neighbor list builds = 15
+Dangerous builds = 0
+reset_timestep 0
+
+# ------------- RUN ---------------
+
+thermo 100
+thermo_style multi
+timestep 8
+
+run_style respa 3 2 8 bond 1 dihedral 2 pair 2 kspace 3
+Respa levels:
+ 1 = bond angle
+ 2 = dihedral improper pair
+ 3 = kspace
+
+velocity all create 200.0 12345678 dist uniform
+#dump dump1 all atom 100 4pti.dump
+
+fix 1 all nvt temp 200 300 25
+fix cor all filter/corotate m 1.0
+ 163 = # of size 2 clusters
+ 0 = # of size 3 clusters
+ 25 = # of size 4 clusters
+ 0 = # of size 5 clusters
+ 100 = # of frozen angles
+
+run 1000
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+ G vector (1/distance) = 0.203272
+ grid = 16 16 16
+ stencil order = 5
+ estimated absolute RMS force accuracy = 0.0316399
+ estimated relative force accuracy = 9.52826e-05
+ using double precision FFTs
+ 3d grid and FFT values/proc = 3549 1024
+Per MPI rank memory allocation (min/avg/max) = 17.14 | 17.63 | 18.14 Mbytes
+---------------- Step 0 ----- CPU = 0.0000 (sec) ----------------
+TotEng = -3220.3378 KinEng = 531.1804 Temp = 200.0000
+PotEng = -3751.5182 E_bond = 42.2810 E_angle = 345.2593
+E_dihed = 337.8361 E_impro = 24.2103 E_vdwl = -288.5339
+E_coul = -886.3622 E_long = -3326.2088 Press = 83.2284
+---------------- Step 100 ----- CPU = 3.4639 (sec) ----------------
+TotEng = -2718.4266 KinEng = 539.6246 Temp = 203.1794
+PotEng = -3258.0513 E_bond = 203.2306 E_angle = 566.1887
+E_dihed = 397.6756 E_impro = 34.7695 E_vdwl = -248.6577
+E_coul = -874.8446 E_long = -3336.4135 Press = 135.8653
+---------------- Step 200 ----- CPU = 6.8898 (sec) ----------------
+TotEng = -2662.0450 KinEng = 625.0178 Temp = 235.3317
+PotEng = -3287.0628 E_bond = 208.1691 E_angle = 590.8259
+E_dihed = 389.1424 E_impro = 30.5879 E_vdwl = -240.5397
+E_coul = -926.3110 E_long = -3338.9375 Press = 103.4843
+---------------- Step 300 ----- CPU = 10.2791 (sec) ----------------
+TotEng = -2661.8829 KinEng = 623.0352 Temp = 234.5852
+PotEng = -3284.9181 E_bond = 203.0274 E_angle = 573.6583
+E_dihed = 383.0124 E_impro = 41.9015 E_vdwl = -218.0696
+E_coul = -926.5806 E_long = -3341.8675 Press = 45.6868
+---------------- Step 400 ----- CPU = 13.5874 (sec) ----------------
+TotEng = -2594.5220 KinEng = 672.8693 Temp = 253.3487
+PotEng = -3267.3914 E_bond = 201.3378 E_angle = 612.7099
+E_dihed = 410.1920 E_impro = 44.0201 E_vdwl = -217.9714
+E_coul = -971.6203 E_long = -3346.0595 Press = -121.1015
+---------------- Step 500 ----- CPU = 16.9047 (sec) ----------------
+TotEng = -2603.9306 KinEng = 668.2122 Temp = 251.5952
+PotEng = -3272.1428 E_bond = 238.1081 E_angle = 578.3310
+E_dihed = 399.1305 E_impro = 41.4314 E_vdwl = -216.9664
+E_coul = -969.4047 E_long = -3342.7729 Press = 156.7851
+---------------- Step 600 ----- CPU = 20.1970 (sec) ----------------
+TotEng = -2531.1096 KinEng = 728.1698 Temp = 274.1705
+PotEng = -3259.2794 E_bond = 232.8396 E_angle = 621.3323
+E_dihed = 398.1952 E_impro = 37.0914 E_vdwl = -241.6350
+E_coul = -963.1540 E_long = -3343.9488 Press = 58.6784
+---------------- Step 700 ----- CPU = 23.4360 (sec) ----------------
+TotEng = -2499.9495 KinEng = 742.1211 Temp = 279.4234
+PotEng = -3242.0705 E_bond = 240.5622 E_angle = 582.9270
+E_dihed = 396.6246 E_impro = 36.6510 E_vdwl = -228.4925
+E_coul = -926.8734 E_long = -3343.4695 Press = -60.7458
+---------------- Step 800 ----- CPU = 26.6709 (sec) ----------------
+TotEng = -2426.0217 KinEng = 760.1083 Temp = 286.1959
+PotEng = -3186.1300 E_bond = 266.5863 E_angle = 652.3401
+E_dihed = 380.7407 E_impro = 34.6861 E_vdwl = -225.3729
+E_coul = -953.2382 E_long = -3341.8721 Press = -57.9824
+---------------- Step 900 ----- CPU = 29.8152 (sec) ----------------
+TotEng = -2419.4636 KinEng = 780.8361 Temp = 294.0004
+PotEng = -3200.2996 E_bond = 269.3237 E_angle = 665.7171
+E_dihed = 408.3527 E_impro = 43.7811 E_vdwl = -254.0696
+E_coul = -1002.0694 E_long = -3331.3352 Press = -52.0169
+---------------- Step 1000 ----- CPU = 32.8748 (sec) ----------------
+TotEng = -2398.7244 KinEng = 811.9856 Temp = 305.7288
+PotEng = -3210.7099 E_bond = 258.2207 E_angle = 639.3671
+E_dihed = 379.3353 E_impro = 41.7602 E_vdwl = -207.2654
+E_coul = -983.9330 E_long = -3338.1948 Press = 89.4870
+Loop time of 32.8751 on 4 procs for 1000 steps with 892 atoms
+
+Performance: 21.025 ns/day, 1.141 hours/ns, 30.418 timesteps/s
+31.9% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section | min time | avg time | max time |%varavg| %total
+---------------------------------------------------------------
+Pair | 12.449 | 19.023 | 24.612 | 99.6 | 57.86
+Bond | 1.4547 | 2.8768 | 3.9098 | 61.4 | 8.75
+Kspace | 1.0537 | 1.0778 | 1.0992 | 2.1 | 3.28
+Neigh | 0.67542 | 0.67994 | 0.68323 | 0.3 | 2.07
+Comm | 1.8602 | 8.4515 | 16.516 | 182.9 | 25.71
+Output | 0.000839 | 0.00147 | 0.003293 | 2.7 | 0.00
+Modify | 0.56658 | 0.63186 | 0.69304 | 6.8 | 1.92
+Other | | 0.133 | | | 0.40
+
+Nlocal: 223 ave 339 max 136 min
+Histogram: 1 1 0 0 0 1 0 0 0 1
+Nghost: 590 ave 626 max 552 min
+Histogram: 1 0 0 0 1 0 1 0 0 1
+Neighs: 36488.2 ave 41965 max 29054 min
+Histogram: 1 0 0 0 1 0 0 0 1 1
+
+Total # of neighbors = 145953
+Ave neighs/atom = 163.624
+Ave special neighs/atom = 10.9395
+Neighbor list builds = 189
+Dangerous builds = 0
+
+unfix cor
+unfix 1
+
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:36
diff --git a/examples/USER/misc/filter_corotate/log.22Jun2017.peptide.g++.1 b/examples/USER/misc/filter_corotate/log.22Jun2017.peptide.g++.1
new file mode 100644
index 0000000000..22c5483c9e
--- /dev/null
+++ b/examples/USER/misc/filter_corotate/log.22Jun2017.peptide.g++.1
@@ -0,0 +1,147 @@
+LAMMPS (20 Jun 2017)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
+ using 1 OpenMP thread(s) per MPI task
+# Solvated 5-mer peptide, run for 8ps in NVT
+
+units real
+atom_style full
+
+pair_style lj/charmm/coul/long 8.0 10.0 10.0
+bond_style harmonic
+angle_style charmm
+dihedral_style charmm
+improper_style harmonic
+kspace_style pppm 0.0001
+
+read_data data.peptide
+ orthogonal box = (36.8402 41.0137 29.7681) to (64.2116 68.3851 57.1395)
+ 1 by 1 by 1 MPI processor grid
+ reading atoms ...
+ 2004 atoms
+ reading velocities ...
+ 2004 velocities
+ scanning bonds ...
+ 3 = max bonds/atom
+ scanning angles ...
+ 6 = max angles/atom
+ scanning dihedrals ...
+ 14 = max dihedrals/atom
+ scanning impropers ...
+ 1 = max impropers/atom
+ reading bonds ...
+ 1365 bonds
+ reading angles ...
+ 786 angles
+ reading dihedrals ...
+ 207 dihedrals
+ reading impropers ...
+ 12 impropers
+ 4 = max # of 1-2 neighbors
+ 7 = max # of 1-3 neighbors
+ 14 = max # of 1-4 neighbors
+ 18 = max # of special neighbors
+
+neighbor 2.0 bin
+neigh_modify delay 5
+
+thermo 50
+#dump dump1 all atom 100 peptide.dump
+
+timestep 8
+
+run_style respa 3 2 8 bond 1 dihedral 2 pair 2 kspace 3
+Respa levels:
+ 1 = bond angle
+ 2 = dihedral improper pair
+ 3 = kspace
+
+fix 1 all nvt temp 250.0 250.0 100.0 tchain 1
+fix cor all filter/corotate m 1.0
+ 19 = # of size 2 clusters
+ 0 = # of size 3 clusters
+ 3 = # of size 4 clusters
+ 0 = # of size 5 clusters
+ 646 = # of frozen angles
+run 1000
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+ G vector (1/distance) = 0.268725
+ grid = 15 15 15
+ stencil order = 5
+ estimated absolute RMS force accuracy = 0.0228209
+ estimated relative force accuracy = 6.87243e-05
+ using double precision FFTs
+ 3d grid and FFT values/proc = 10648 3375
+Neighbor list info ...
+ update every 1 steps, delay 5 steps, check yes
+ max neighbors/atom: 2000, page size: 100000
+ master list distance cutoff = 12
+ ghost atom cutoff = 12
+ binsize = 6, bins = 5 5 5
+ 1 neighbor lists, perpetual/occasional/extra = 1 0 0
+ (1) pair lj/charmm/coul/long, perpetual
+ attributes: half, newton on
+ pair build: half/bin/newton
+ stencil: half/bin/3d/newton
+ bin: standard
+Per MPI rank memory allocation (min/avg/max) = 22.72 | 22.72 | 22.72 Mbytes
+Step Temp E_pair E_mol TotEng Press
+ 0 190.0857 -6442.7438 70.391457 -5237.4338 20361.984
+ 50 239.47667 -7205.1006 1092.7664 -4682.5237 -23733.122
+ 100 244.63086 -6788.0793 422.97204 -4904.5234 16458.011
+ 150 240.79042 -7267.0791 966.31411 -4863.1107 -13554.894
+ 200 254.77122 -6868.5713 591.00071 -4756.4431 10532.563
+ 250 241.87417 -7264.9349 856.9357 -4963.8743 -9043.4359
+ 300 251.37775 -6976.8 650.55612 -4825.3773 6986.2021
+ 350 250.81494 -7286.7011 880.11184 -4909.0829 -6392.4665
+ 400 247.55673 -7104.4036 701.89555 -4924.4551 4720.7811
+ 450 258.54988 -7215.3011 832.23692 -4839.3759 -3446.3859
+ 500 246.80928 -7151.2468 715.61007 -4962.0464 2637.5769
+ 550 246.20721 -7159.0464 805.24974 -4883.8011 -2725.227
+ 600 250.62483 -7201.7688 806.10076 -4899.2968 770.22352
+ 650 247.59777 -7260.1607 802.97277 -4978.8899 -430.42309
+ 700 246.86951 -7286.2971 825.99865 -4986.3486 -427.88651
+ 750 252.79268 -7307.8572 833.4822 -4965.0605 -614.74372
+ 800 251.73191 -7315.2457 839.59859 -4972.666 952.56448
+ 850 246.75844 -7303.6221 816.67112 -5013.6642 -2055.2823
+ 900 251.00123 -7317.4219 825.12165 -4993.6817 -356.53166
+ 950 259.20822 -7252.3466 854.62611 -4850.1016 -1719.5267
+ 1000 245.72486 -7347.5547 811.48146 -5068.9576 -717.6136
+Loop time of 357.523 on 1 procs for 1000 steps with 2004 atoms
+
+Performance: 1.933 ns/day, 12.414 hours/ns, 2.797 timesteps/s
+32.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section | min time | avg time | max time |%varavg| %total
+---------------------------------------------------------------
+Pair | 328.2 | 328.2 | 328.2 | 0.0 | 91.80
+Bond | 4.4815 | 4.4815 | 4.4815 | 0.0 | 1.25
+Kspace | 3.9448 | 3.9448 | 3.9448 | 0.0 | 1.10
+Neigh | 12.457 | 12.457 | 12.457 | 0.0 | 3.48
+Comm | 3.2147 | 3.2147 | 3.2147 | 0.0 | 0.90
+Output | 0.001689 | 0.001689 | 0.001689 | 0.0 | 0.00
+Modify | 3.937 | 3.937 | 3.937 | 0.0 | 1.10
+Other | | 1.289 | | | 0.36
+
+Nlocal: 2004 ave 2004 max 2004 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost: 11191 ave 11191 max 11191 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs: 708610 ave 708610 max 708610 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 708610
+Ave neighs/atom = 353.598
+Ave special neighs/atom = 2.34032
+Neighbor list builds = 200
+Dangerous builds = 200
+unfix cor
+unfix 1
+
+
+
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:05:57
diff --git a/examples/USER/misc/filter_corotate/log.22Jun2017.peptide.g++.4 b/examples/USER/misc/filter_corotate/log.22Jun2017.peptide.g++.4
new file mode 100644
index 0000000000..eec3843bd0
--- /dev/null
+++ b/examples/USER/misc/filter_corotate/log.22Jun2017.peptide.g++.4
@@ -0,0 +1,147 @@
+LAMMPS (20 Jun 2017)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
+ using 1 OpenMP thread(s) per MPI task
+# Solvated 5-mer peptide, run for 8ps in NVT
+
+units real
+atom_style full
+
+pair_style lj/charmm/coul/long 8.0 10.0 10.0
+bond_style harmonic
+angle_style charmm
+dihedral_style charmm
+improper_style harmonic
+kspace_style pppm 0.0001
+
+read_data data.peptide
+ orthogonal box = (36.8402 41.0137 29.7681) to (64.2116 68.3851 57.1395)
+ 1 by 2 by 2 MPI processor grid
+ reading atoms ...
+ 2004 atoms
+ reading velocities ...
+ 2004 velocities
+ scanning bonds ...
+ 3 = max bonds/atom
+ scanning angles ...
+ 6 = max angles/atom
+ scanning dihedrals ...
+ 14 = max dihedrals/atom
+ scanning impropers ...
+ 1 = max impropers/atom
+ reading bonds ...
+ 1365 bonds
+ reading angles ...
+ 786 angles
+ reading dihedrals ...
+ 207 dihedrals
+ reading impropers ...
+ 12 impropers
+ 4 = max # of 1-2 neighbors
+ 7 = max # of 1-3 neighbors
+ 14 = max # of 1-4 neighbors
+ 18 = max # of special neighbors
+
+neighbor 2.0 bin
+neigh_modify delay 5
+
+thermo 50
+#dump dump1 all atom 100 peptide.dump
+
+timestep 8
+
+run_style respa 3 2 8 bond 1 dihedral 2 pair 2 kspace 3
+Respa levels:
+ 1 = bond angle
+ 2 = dihedral improper pair
+ 3 = kspace
+
+fix 1 all nvt temp 250.0 250.0 100.0 tchain 1
+fix cor all filter/corotate m 1.0
+ 19 = # of size 2 clusters
+ 0 = # of size 3 clusters
+ 3 = # of size 4 clusters
+ 0 = # of size 5 clusters
+ 646 = # of frozen angles
+run 1000
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+ G vector (1/distance) = 0.268725
+ grid = 15 15 15
+ stencil order = 5
+ estimated absolute RMS force accuracy = 0.0228209
+ estimated relative force accuracy = 6.87243e-05
+ using double precision FFTs
+ 3d grid and FFT values/proc = 4312 960
+Neighbor list info ...
+ update every 1 steps, delay 5 steps, check yes
+ max neighbors/atom: 2000, page size: 100000
+ master list distance cutoff = 12
+ ghost atom cutoff = 12
+ binsize = 6, bins = 5 5 5
+ 1 neighbor lists, perpetual/occasional/extra = 1 0 0
+ (1) pair lj/charmm/coul/long, perpetual
+ attributes: half, newton on
+ pair build: half/bin/newton
+ stencil: half/bin/3d/newton
+ bin: standard
+Per MPI rank memory allocation (min/avg/max) = 16.87 | 17.05 | 17.26 Mbytes
+Step Temp E_pair E_mol TotEng Press
+ 0 190.0857 -6442.7438 70.391457 -5237.4338 20361.984
+ 50 239.47667 -7205.1005 1092.7664 -4682.5237 -23733.122
+ 100 244.63889 -6788.1152 422.96733 -4904.5161 16457.756
+ 150 239.36917 -7258.7053 967.87775 -4861.6589 -13526.261
+ 200 255.14702 -6864.0525 604.58036 -4736.1009 11013.1
+ 250 252.72919 -7303.0966 898.11178 -4896.0494 -8480.8766
+ 300 250.66477 -6989.2603 652.83649 -4839.8141 6209.3375
+ 350 243.30794 -7218.8575 838.31977 -4927.8525 -5180.4928
+ 400 256.3573 -7090.677 706.24197 -4853.8377 3302.577
+ 450 246.15776 -7274.574 834.31676 -4970.557 -3427.971
+ 500 256.28473 -7082.1447 735.42828 -4816.5524 2846.086
+ 550 251.32327 -7341.739 812.64934 -5028.5484 -1786.9277
+ 600 254.57737 -7152.3448 740.52534 -4891.8494 825.91675
+ 650 244.95305 -7207.1136 790.67659 -4953.9295 -520.79769
+ 700 249.4984 -7204.2699 779.06969 -4935.5544 -940.75384
+ 750 248.46962 -7232.1037 791.6642 -4956.9361 -548.12171
+ 800 260.2974 -7293.1982 793.23282 -4945.8435 -1171.26
+ 850 249.79023 -7258.3759 823.56789 -4943.4198 -499.76275
+ 900 249.97237 -7267.0584 784.57992 -4990.0028 -271.33531
+ 950 251.29018 -7261.0642 823.467 -4937.2534 -538.7168
+ 1000 246.05777 -7285.0948 847.90892 -4968.0826 -2613.1854
+Loop time of 94.6835 on 4 procs for 1000 steps with 2004 atoms
+
+Performance: 7.300 ns/day, 3.288 hours/ns, 10.562 timesteps/s
+37.9% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section | min time | avg time | max time |%varavg| %total
+---------------------------------------------------------------
+Pair | 33.389 | 78.508 | 94.639 | 294.1 | 82.92
+Bond | 0.39957 | 1.104 | 1.4443 | 40.6 | 1.17
+Kspace | 0.53324 | 1.2631 | 1.5137 | 37.5 | 1.33
+Neigh | 1.2668 | 3.011 | 3.5942 | 58.0 | 3.18
+Comm | 3.4563 | 8.8707 | 11.494 | 107.9 | 9.37
+Output | 0.000435 | 0.0017425 | 0.004136 | 3.4 | 0.00
+Modify | 0.59335 | 1.4123 | 1.6921 | 39.8 | 1.49
+Other | | 0.5129 | | | 0.54
+
+Nlocal: 501 ave 515 max 476 min
+Histogram: 1 0 0 0 0 0 0 1 1 1
+Nghost: 6681.5 ave 6740 max 6634 min
+Histogram: 2 0 0 0 0 0 0 1 0 1
+Neighs: 176872 ave 182642 max 168464 min
+Histogram: 1 0 0 0 0 0 1 1 0 1
+
+Total # of neighbors = 707486
+Ave neighs/atom = 353.037
+Ave special neighs/atom = 2.34032
+Neighbor list builds = 200
+Dangerous builds = 200
+unfix cor
+unfix 1
+
+
+
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:01:53
diff --git a/examples/neb/README b/examples/neb/README
index 0993c5fcdb..5ef32f2ba6 100644
--- a/examples/neb/README
+++ b/examples/neb/README
@@ -2,15 +2,13 @@ Run these examples as:
mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop1
mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop2
-mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop1freeend
+mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop1.end
mpirun -np 3 lmp_g++ -partition 3x1 -in in.neb.sivac
mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop1
mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop2
-mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop1freeend
-mpirun -np 6 lmp_g++ -partition 3x2 -in in.neb.sivac
-mpirun -np 9 lmp_g++ -partition 3x3 -in in.neb.sivac
-
+mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop1.end
+mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.sivac
Note that more than 4 replicas should be used for a precise estimate
of the activation energy corresponding to a transition.
diff --git a/examples/neb/in.neb.hop1 b/examples/neb/in.neb.hop1
index b874d1ba32..f26b52a28a 100644
--- a/examples/neb/in.neb.hop1
+++ b/examples/neb/in.neb.hop1
@@ -51,7 +51,7 @@ set group nebatoms type 3
group nonneb subtract all nebatoms
fix 1 lower setforce 0.0 0.0 0.0
-fix 2 nebatoms neb 1.0 nudg_style idealpos
+fix 2 nebatoms neb 1.0 parallel ideal
fix 3 all enforce2d
thermo 100
diff --git a/examples/neb/in.neb.hop1freeend b/examples/neb/in.neb.hop1.end
similarity index 91%
rename from examples/neb/in.neb.hop1freeend
rename to examples/neb/in.neb.hop1.end
index fa90e9a98c..81e5315306 100644
--- a/examples/neb/in.neb.hop1freeend
+++ b/examples/neb/in.neb.hop1.end
@@ -15,7 +15,7 @@ variable u uloop 20
lattice hex 0.9
region box block 0 20 0 10 -0.25 0.25
-read_data initial.hop1freeend
+read_data initial.hop1.end
# LJ potentials
@@ -41,7 +41,7 @@ set group nebatoms type 3
group nonneb subtract all nebatoms
fix 1 lower setforce 0.0 0.0 0.0
-fix 2 nebatoms neb 1.0 nudg_style idealpos freeend ini
+fix 2 nebatoms neb 1.0 parallel ideal end first 1.0
fix 3 all enforce2d
thermo 100
diff --git a/examples/neb/in.neb.hop2 b/examples/neb/in.neb.hop2
index 242de759fa..e69fb338cd 100644
--- a/examples/neb/in.neb.hop2
+++ b/examples/neb/in.neb.hop2
@@ -65,4 +65,4 @@ thermo 100
min_style fire
-neb 0.0 0.01 1000 1000 100 final final.hop2
+neb 0.0 0.05 1000 1000 100 final final.hop2
diff --git a/examples/neb/initial.hop1freeend b/examples/neb/initial.hop1.end
similarity index 100%
rename from examples/neb/initial.hop1freeend
rename to examples/neb/initial.hop1.end
diff --git a/examples/neb/log.19Jun17.neb.hop1.end.g++.4 b/examples/neb/log.19Jun17.neb.hop1.end.g++.4
new file mode 100644
index 0000000000..4878b86566
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.hop1.end.g++.4
@@ -0,0 +1,11 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 229.26196 146.68251 2.9774577 4.4127369 233.11559 0.023301843 0.0224626 1.4763579 0 -3.048332 0.33333333 -3.0250302 0.66666667 -3.0291888 1 -3.0474928
+100 0.11027532 0.085410308 3.0967938 0.024201563 0.38551033 0.0017583261 0.0021866943 1.7710358 0 -3.0483469 0.31192818 -3.0465886 0.61093022 -3.0466143 1 -3.0487752
+130 0.09954083 0.075481108 3.0927626 0.015664388 0.37491833 0.0017573704 0.0021913201 1.7713726 0 -3.048342 0.31428487 -3.0465846 0.61762817 -3.0466296 1 -3.048776
+Climbing replica = 2
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+130 0.37838747 0.3502435 3.0927626 0.015664388 0.37491833 0.0017573704 0.0021913201 1.7713726 0 -3.048342 0.31428487 -3.0465846 0.61762817 -3.0466296 1 -3.048776
+230 0.22757286 0.12027481 3.1250243 0.0081260569 0.14019507 0.0018364585 0.002278918 1.76926 0 -3.0483347 0.39730698 -3.0464983 0.64450769 -3.0466973 1 -3.0487772
+278 0.096184498 0.085088496 3.1405655 0.0068164307 0.093861113 0.0018426056 0.002286256 1.7684765 0 -3.0483338 0.41277997 -3.0464912 0.65562984 -3.0467294 1 -3.0487775
diff --git a/examples/neb/log.19Jun17.neb.hop1.end.g++.8 b/examples/neb/log.19Jun17.neb.hop1.end.g++.8
new file mode 100644
index 0000000000..62344b3da5
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.hop1.end.g++.8
@@ -0,0 +1,11 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 229.26196 146.68251 2.9774577 4.4127369 233.11559 0.023301843 0.0224626 1.4763579 0 -3.048332 0.33333333 -3.0250302 0.66666667 -3.0291888 1 -3.0474928
+100 0.11375359 0.085350745 3.0966418 0.0236765 0.38531777 0.0017582606 0.0021868783 1.7710738 0 -3.0483467 0.31201141 -3.0465884 0.61117406 -3.0466149 1 -3.0487753
+119 0.09996986 0.078639268 3.0937691 0.017444108 0.3780308 0.0017574935 0.0021899317 1.7713574 0 -3.0483433 0.31354192 -3.0465858 0.61555533 -3.0466249 1 -3.0487758
+Climbing replica = 2
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+119 0.3793192 0.35281863 3.0937691 0.017444108 0.3780308 0.0017574935 0.0021899317 1.7713574 0 -3.0483433 0.31354192 -3.0465858 0.61555533 -3.0466249 1 -3.0487758
+219 0.20159133 0.12247026 3.1244061 0.0085896057 0.13938632 0.0018362816 0.0022783681 1.7693295 0 -3.048335 0.39646633 -3.0464988 0.64277703 -3.0466925 1 -3.0487771
+266 0.099868725 0.086180598 3.1401661 0.0070922949 0.095128081 0.001842608 0.002286044 1.7685191 0 -3.048334 0.41231024 -3.0464914 0.65425179 -3.0467252 1 -3.0487774
diff --git a/examples/neb/log.19Jun17.neb.hop1.g++.4 b/examples/neb/log.19Jun17.neb.hop1.g++.4
new file mode 100644
index 0000000000..e2984c031c
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.hop1.g++.4
@@ -0,0 +1,9 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 4327.2753 2746.3378 0.082169072 4.9967651 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969
+87 0.095951502 0.052720903 0.005588927 0.065110105 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+87 0.14137277 0.11108954 0.005588927 0.065110105 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775
+124 0.099583263 0.085936899 0.0044220372 0.023873795 0.091308308 0.0071061754 0.0022863931 2.308121 0 -3.0535968 0.32223905 -3.0473329 0.61673898 -3.0464906 1 -3.048777
diff --git a/examples/neb/log.19Jun17.neb.hop1.g++.8 b/examples/neb/log.19Jun17.neb.hop1.g++.8
new file mode 100644
index 0000000000..d1be1284fa
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.hop1.g++.8
@@ -0,0 +1,9 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 4327.2753 2746.3378 0.082169072 4.9967651 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969
+87 0.095951792 0.052720902 0.0055889267 0.065110091 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+87 0.14137297 0.11108954 0.0055889267 0.065110091 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775
+124 0.099582186 0.08593683 0.0044220345 0.023873731 0.091308197 0.0071061754 0.0022863931 2.3081211 0 -3.0535968 0.32223904 -3.0473329 0.61673896 -3.0464906 1 -3.048777
diff --git a/examples/neb/log.19Jun17.neb.hop2.g++.4 b/examples/neb/log.19Jun17.neb.hop2.g++.4
new file mode 100644
index 0000000000..c6b6cbe2ce
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.hop2.g++.4
@@ -0,0 +1,12 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 14.104748 10.419633 0.1227071 4.999238 8.2087606 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305
+100 0.24646695 0.10792196 0.0077146918 0.058733261 0.63504706 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923
+200 0.061777741 0.050288749 0.0047486883 0.0095236035 0.88698597 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938
+261 0.048699591 0.038138604 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+261 0.95753855 0.94297239 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494
+361 0.072509627 0.06580631 0.0027545765 0.0044749366 0.016746483 0.0016018879 0.0016017805 1.1704611 0 -3.0514943 0.28176307 -3.0503855 0.50355454 -3.0498924 1 -3.0514942
+381 0.04884836 0.040787876 0.0023445904 0.0035162935 0.017959209 0.0016017716 0.0016016898 1.1713862 0 -3.0514943 0.27120138 -3.0504399 0.50428218 -3.0498925 1 -3.0514942
diff --git a/examples/neb/log.19Jun17.neb.hop2.g++.8 b/examples/neb/log.19Jun17.neb.hop2.g++.8
new file mode 100644
index 0000000000..c6b6cbe2ce
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.hop2.g++.8
@@ -0,0 +1,12 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 14.104748 10.419633 0.1227071 4.999238 8.2087606 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305
+100 0.24646695 0.10792196 0.0077146918 0.058733261 0.63504706 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923
+200 0.061777741 0.050288749 0.0047486883 0.0095236035 0.88698597 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938
+261 0.048699591 0.038138604 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+261 0.95753855 0.94297239 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494
+361 0.072509627 0.06580631 0.0027545765 0.0044749366 0.016746483 0.0016018879 0.0016017805 1.1704611 0 -3.0514943 0.28176307 -3.0503855 0.50355454 -3.0498924 1 -3.0514942
+381 0.04884836 0.040787876 0.0023445904 0.0035162935 0.017959209 0.0016017716 0.0016016898 1.1713862 0 -3.0514943 0.27120138 -3.0504399 0.50428218 -3.0498925 1 -3.0514942
diff --git a/examples/neb/log.19Jun17.neb.sivac.g++.4 b/examples/neb/log.19Jun17.neb.sivac.g++.4
new file mode 100644
index 0000000000..0d9880ca81
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.sivac.g++.4
@@ -0,0 +1,17 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.33333333 -2212.7428 0.66666667 -2212.2247 1 -2211.7959
+10 0.24005275 0.036502104 0.036483049 0.24005275 0.68351722 0.42916118 0.41794425 1.6989349 0 -2213.3365 0.32909183 -2212.9587 0.65386736 -2212.9073 1 -2213.3253
+20 0.07940898 0.016398055 0.024706844 0.07940898 0.71637784 0.41387872 0.41157886 1.7343662 0 -2213.3369 0.32478734 -2212.9621 0.65348766 -2212.923 1 -2213.3346
+30 0.094973707 0.0083631681 0.015145947 0.035267404 0.7535772 0.40072717 0.40024605 1.7504612 0 -2213.3372 0.32705584 -2212.9584 0.65894506 -2212.9365 1 -2213.3367
+40 0.027727472 0.0044528145 0.011618173 0.022562656 0.76133752 0.39614635 0.39591731 1.7547519 0 -2213.3373 0.32873163 -2212.9562 0.66124255 -2212.9411 1 -2213.337
+50 0.019429348 0.0030110281 0.0087135563 0.015391975 0.76952681 0.39274846 0.3926388 1.7578616 0 -2213.3373 0.33022595 -2212.9543 0.66307279 -2212.9446 1 -2213.3372
+60 0.019009471 0.0016234562 0.0053426307 0.0086166186 0.77759617 0.38936861 0.38933364 1.7610433 0 -2213.3374 0.33187548 -2212.9523 0.66497617 -2212.948 1 -2213.3373
+63 0.0097365134 0.0012734598 0.004777604 0.0076121987 0.77865149 0.38888778 0.38886047 1.7615294 0 -2213.3374 0.33212107 -2212.952 0.66525385 -2212.9485 1 -2213.3373
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+63 0.77865149 0.31085821 0.004777604 0.0076121987 0.77865149 0.38888778 0.38886047 1.7615294 0 -2213.3374 0.33212107 -2212.952 0.66525385 -2212.9485 1 -2213.3373
+73 0.098175496 0.033609035 0.0027886955 0.0042742148 0.036594003 0.51024838 0.51023983 1.7607181 0 -2213.3374 0.27574151 -2213.0416 0.50432348 -2212.8271 1 -2213.3374
+83 0.03341862 0.012760857 0.0020868177 0.0031625649 0.010189924 0.51014634 0.51014168 1.7602562 0 -2213.3374 0.26045338 -2213.0672 0.50355193 -2212.8272 1 -2213.3374
+93 0.0097374358 0.0028416114 0.0014003718 0.0020986584 0.0053485291 0.51011052 0.51010848 1.7601202 0 -2213.3374 0.25397887 -2213.0783 0.50388111 -2212.8273 1 -2213.3374
diff --git a/examples/neb/log.19Jun17.neb.sivac.g++.8 b/examples/neb/log.19Jun17.neb.sivac.g++.8
new file mode 100644
index 0000000000..260eb9e18b
--- /dev/null
+++ b/examples/neb/log.19Jun17.neb.sivac.g++.8
@@ -0,0 +1,18 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.33333333 -2212.7428 0.66666667 -2212.2247 1 -2211.7959
+10 0.24005275 0.036502104 0.036483049 0.24005275 0.68351722 0.42916118 0.41794425 1.6989349 0 -2213.3365 0.32909183 -2212.9587 0.65386736 -2212.9073 1 -2213.3253
+20 0.07940898 0.016398055 0.024706844 0.07940898 0.71637784 0.41387872 0.41157886 1.7343662 0 -2213.3369 0.32478734 -2212.9621 0.65348766 -2212.923 1 -2213.3346
+30 0.094973708 0.0083631681 0.015145947 0.035267404 0.7535772 0.40072717 0.40024605 1.7504612 0 -2213.3372 0.32705584 -2212.9584 0.65894506 -2212.9365 1 -2213.3367
+40 0.027727472 0.0044528144 0.011618173 0.022562656 0.76133752 0.39614635 0.39591731 1.7547519 0 -2213.3373 0.32873163 -2212.9562 0.66124255 -2212.9411 1 -2213.337
+50 0.019429341 0.0030110281 0.0087135565 0.015391975 0.7695268 0.39274846 0.3926388 1.7578616 0 -2213.3373 0.33022595 -2212.9543 0.66307279 -2212.9446 1 -2213.3372
+60 0.019048963 0.0016262345 0.0053426844 0.0086167196 0.77759655 0.38936867 0.3893337 1.7610433 0 -2213.3374 0.33187545 -2212.9523 0.66497615 -2212.948 1 -2213.3373
+63 0.0097037048 0.0012761841 0.0047749367 0.0076075138 0.77865545 0.38888554 0.38885827 1.7615318 0 -2213.3374 0.33212221 -2212.952 0.66525512 -2212.9485 1 -2213.3373
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+63 0.77865545 0.3108551 0.0047749367 0.0076075138 0.77865545 0.38888554 0.38885827 1.7615318 0 -2213.3374 0.33212221 -2212.952 0.66525512 -2212.9485 1 -2213.3373
+73 0.098595989 0.033659485 0.0027927196 0.0042813387 0.038224344 0.51024759 0.51023901 1.7607156 0 -2213.3374 0.27595612 -2213.0413 0.50453988 -2212.8271 1 -2213.3374
+83 0.033344977 0.012868685 0.0020880608 0.0031645847 0.010250413 0.51014677 0.5101421 1.7602601 0 -2213.3374 0.26053624 -2213.067 0.50358775 -2212.8272 1 -2213.3374
+93 0.013254873 0.0038176141 0.0014928226 0.0022407967 0.0058577818 0.51011371 0.51011138 1.7601272 0 -2213.3374 0.25452741 -2213.0774 0.50382161 -2212.8273 1 -2213.3374
+95 0.0099964951 0.0031053214 0.0014131665 0.0021184362 0.0053683638 0.51011105 0.51010897 1.7601232 0 -2213.3374 0.2540975 -2213.0781 0.50387313 -2212.8273 1 -2213.3374
diff --git a/examples/neb/log.5Oct16.neb.hop1.g++.4 b/examples/neb/log.5Oct16.neb.hop1.g++.4
deleted file mode 100644
index c678e69493..0000000000
--- a/examples/neb/log.5Oct16.neb.hop1.g++.4
+++ /dev/null
@@ -1,10 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0 4327.2753 2746.3378 0.3387091 5.0075576 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969
-100 0.10482184 0.085218486 0.014588241 0.066178594 0.19602237 0.0070900402 0.0022691875 2.3031875 0 -3.0535967 0.31839181 -3.0473647 0.63987598 -3.0465067 1 -3.0487759
-111 0.096708467 0.07803707 0.013922973 0.05417562 0.2023467 0.0070871172 0.0022668002 2.3052945 0 -3.0535968 0.31853431 -3.0473633 0.64178871 -3.0465096 1 -3.0487764
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-111 0.2023467 0.1777038 0.013922973 0.05417562 0.2023467 0.0070871172 0.0022668002 2.3052945 0 -3.0535968 0.31853431 -3.0473633 0.64178871 -3.0465096 1 -3.0487764
-179 0.096874474 0.090676856 0.01040177 0.023364005 0.096874474 0.0071047642 0.0022856172 2.3122768 0 -3.0535969 0.31577311 -3.0473955 0.61798541 -3.0464922 1 -3.0487778
diff --git a/examples/neb/log.5Oct16.neb.hop1.g++.8 b/examples/neb/log.5Oct16.neb.hop1.g++.8
deleted file mode 100644
index d70f02bd16..0000000000
--- a/examples/neb/log.5Oct16.neb.hop1.g++.8
+++ /dev/null
@@ -1,10 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0 4327.2753 2746.3378 0.3387091 5.0075576 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969
-100 0.10482171 0.085218406 0.014588234 0.066178435 0.19602242 0.0070900401 0.0022691875 2.3031875 0 -3.0535967 0.31839181 -3.0473647 0.639876 -3.0465067 1 -3.0487759
-111 0.096708718 0.078036984 0.013922966 0.054175505 0.20234693 0.0070871172 0.0022668002 2.3052946 0 -3.0535968 0.31853431 -3.0473633 0.64178873 -3.0465096 1 -3.0487764
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-111 0.20234693 0.17770387 0.013922966 0.054175505 0.20234693 0.0070871172 0.0022668002 2.3052946 0 -3.0535968 0.31853431 -3.0473633 0.64178873 -3.0465096 1 -3.0487764
-178 0.09975409 0.093814031 0.010577358 0.024247224 0.09975409 0.0071042931 0.0022851195 2.312004 0 -3.0535969 0.31607934 -3.0473923 0.618931 -3.0464926 1 -3.0487777
diff --git a/examples/neb/log.5Oct16.neb.hop2.g++.4 b/examples/neb/log.5Oct16.neb.hop2.g++.4
deleted file mode 100644
index 9977287303..0000000000
--- a/examples/neb/log.5Oct16.neb.hop2.g++.4
+++ /dev/null
@@ -1,18 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0 14.104748 10.419633 0.24852044 5.0039071 8.2116049 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305
-100 0.24646695 0.10792196 0.01781018 0.098854684 0.63725646 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923
-200 0.061777741 0.050288749 0.012466513 0.020420207 0.88741041 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938
-300 0.056346766 0.030000618 0.0093152917 0.013765031 1.0101529 0.0014069751 0.0014068154 1.1699608 0 -3.0514942 0.30992449 -3.0502613 0.64174291 -3.0500873 1 -3.0514941
-400 0.025589489 0.015671005 0.0061287063 0.008588518 1.1136424 0.001370987 0.0013709154 1.1704204 0 -3.0514943 0.32016645 -3.0502198 0.65324019 -3.0501233 1 -3.0514943
-500 0.014778626 0.0092108366 0.0042668521 0.0059963914 1.1636579 0.0013527466 0.0013527072 1.1706283 0 -3.0514944 0.32550275 -3.0501993 0.65875414 -3.0501416 1 -3.0514943
-600 0.08786211 0.020876327 0.0031421548 0.0051657363 1.1898894 0.0013430848 0.0013430599 1.1707681 0 -3.0514944 0.32831927 -3.0501889 0.66160681 -3.0501513 1 -3.0514944
-633 0.0098132678 0.0055392541 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-633 1.1924486 1.1648685 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944
-733 0.095331134 0.089136608 0.0021551441 0.0031844438 0.043042998 0.0016022317 0.0016022168 1.170789 0 -3.0514944 0.29157063 -3.0503375 0.50358402 -3.0498922 1 -3.0514944
-833 0.10539135 0.030724373 0.0013749699 0.002221013 0.10539135 0.0016019798 0.001601971 1.1732118 0 -3.0514944 0.26249002 -3.0504848 0.50415223 -3.0498924 1 -3.0514944
-933 0.01883894 0.011496399 0.0011058925 0.0018178041 0.014621806 0.0016018934 0.0016018865 1.173866 0 -3.0514944 0.25788763 -3.0505113 0.50466375 -3.0498925 1 -3.0514944
-996 0.0082457876 0.0036336551 0.00077325986 0.0013910671 0.0068823708 0.0016018293 0.0016018244 1.174511 0 -3.0514944 0.2544553 -3.0505324 0.50520462 -3.0498926 1 -3.0514944
diff --git a/examples/neb/log.5Oct16.neb.hop2.g++.8 b/examples/neb/log.5Oct16.neb.hop2.g++.8
deleted file mode 100644
index 9977287303..0000000000
--- a/examples/neb/log.5Oct16.neb.hop2.g++.8
+++ /dev/null
@@ -1,18 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0 14.104748 10.419633 0.24852044 5.0039071 8.2116049 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305
-100 0.24646695 0.10792196 0.01781018 0.098854684 0.63725646 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923
-200 0.061777741 0.050288749 0.012466513 0.020420207 0.88741041 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938
-300 0.056346766 0.030000618 0.0093152917 0.013765031 1.0101529 0.0014069751 0.0014068154 1.1699608 0 -3.0514942 0.30992449 -3.0502613 0.64174291 -3.0500873 1 -3.0514941
-400 0.025589489 0.015671005 0.0061287063 0.008588518 1.1136424 0.001370987 0.0013709154 1.1704204 0 -3.0514943 0.32016645 -3.0502198 0.65324019 -3.0501233 1 -3.0514943
-500 0.014778626 0.0092108366 0.0042668521 0.0059963914 1.1636579 0.0013527466 0.0013527072 1.1706283 0 -3.0514944 0.32550275 -3.0501993 0.65875414 -3.0501416 1 -3.0514943
-600 0.08786211 0.020876327 0.0031421548 0.0051657363 1.1898894 0.0013430848 0.0013430599 1.1707681 0 -3.0514944 0.32831927 -3.0501889 0.66160681 -3.0501513 1 -3.0514944
-633 0.0098132678 0.0055392541 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-633 1.1924486 1.1648685 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944
-733 0.095331134 0.089136608 0.0021551441 0.0031844438 0.043042998 0.0016022317 0.0016022168 1.170789 0 -3.0514944 0.29157063 -3.0503375 0.50358402 -3.0498922 1 -3.0514944
-833 0.10539135 0.030724373 0.0013749699 0.002221013 0.10539135 0.0016019798 0.001601971 1.1732118 0 -3.0514944 0.26249002 -3.0504848 0.50415223 -3.0498924 1 -3.0514944
-933 0.01883894 0.011496399 0.0011058925 0.0018178041 0.014621806 0.0016018934 0.0016018865 1.173866 0 -3.0514944 0.25788763 -3.0505113 0.50466375 -3.0498925 1 -3.0514944
-996 0.0082457876 0.0036336551 0.00077325986 0.0013910671 0.0068823708 0.0016018293 0.0016018244 1.174511 0 -3.0514944 0.2544553 -3.0505324 0.50520462 -3.0498926 1 -3.0514944
diff --git a/examples/neb/log.5Oct16.neb.sivac.g++.3 b/examples/neb/log.5Oct16.neb.sivac.g++.3
deleted file mode 100644
index f6adae4a18..0000000000
--- a/examples/neb/log.5Oct16.neb.sivac.g++.3
+++ /dev/null
@@ -1,14 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 3 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.5 -2212.4096 1 -2211.7959
-10 0.27332818 0.040944923 0.039164338 0.27332818 0.17804882 0.51235911 0.497084 1.6790474 0 -2213.3364 0.49024121 -2212.824 1 -2213.3211
-20 0.1820396 0.018049916 0.024428411 0.1820396 0.08601739 0.51038174 0.5080746 1.7224961 0 -2213.337 0.49199582 -2212.8266 1 -2213.3347
-30 0.043288796 0.0068108825 0.017372479 0.043288796 0.049466709 0.51032316 0.5095943 1.7304745 0 -2213.3371 0.49553568 -2212.8268 1 -2213.3364
-40 0.0421393 0.0037035761 0.01173707 0.0421393 0.026104735 0.51022733 0.5100163 1.7366752 0 -2213.3373 0.49838067 -2212.8271 1 -2213.3371
-50 0.025897844 0.0022804241 0.0081056535 0.025897844 0.016908913 0.5101712 0.51008591 1.739143 0 -2213.3373 0.49923344 -2212.8272 1 -2213.3373
-59 0.00962839 0.0012946076 0.005657505 0.009365729 0.012040803 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373
-Climbing replica = 2
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-59 0.012040803 0.0031505502 0.005657505 0.009365729 0.012040803 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373
-63 0.009152118 0.0016692472 0.0049645771 0.0081967836 0.009152118 0.51013743 0.51010776 1.7409028 0 -2213.3374 0.50022239 -2212.8272 1 -2213.3373
diff --git a/examples/neb/log.5Oct16.neb.sivac.g++.6 b/examples/neb/log.5Oct16.neb.sivac.g++.6
deleted file mode 100644
index e00069d052..0000000000
--- a/examples/neb/log.5Oct16.neb.sivac.g++.6
+++ /dev/null
@@ -1,14 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 3 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.5 -2212.4096 1 -2211.7959
-10 0.27332818 0.040944923 0.039164338 0.27332818 0.17804882 0.51235911 0.497084 1.6790474 0 -2213.3364 0.49024121 -2212.824 1 -2213.3211
-20 0.1820396 0.018049916 0.024428411 0.1820396 0.08601739 0.51038174 0.5080746 1.7224961 0 -2213.337 0.49199582 -2212.8266 1 -2213.3347
-30 0.043288796 0.0068108825 0.017372479 0.043288796 0.049466709 0.51032316 0.5095943 1.7304745 0 -2213.3371 0.49553568 -2212.8268 1 -2213.3364
-40 0.042139305 0.0037035764 0.01173707 0.042139305 0.026104735 0.51022733 0.5100163 1.7366752 0 -2213.3373 0.49838067 -2212.8271 1 -2213.3371
-50 0.025899631 0.0022805513 0.0081057075 0.025899631 0.016908929 0.5101712 0.51008591 1.739143 0 -2213.3373 0.49923345 -2212.8272 1 -2213.3373
-59 0.0096285044 0.0012946258 0.0056576061 0.0093678253 0.012040919 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373
-Climbing replica = 2
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-59 0.012040919 0.0031505771 0.0056576061 0.0093678253 0.012040919 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373
-63 0.0091523813 0.0016692845 0.0049647607 0.0081998372 0.0091523813 0.51013743 0.51010775 1.7409028 0 -2213.3374 0.50022236 -2212.8272 1 -2213.3373
diff --git a/examples/neb/log.5Oct16.neb.sivac.g++.9 b/examples/neb/log.5Oct16.neb.sivac.g++.9
deleted file mode 100644
index 31ab7c9ac6..0000000000
--- a/examples/neb/log.5Oct16.neb.sivac.g++.9
+++ /dev/null
@@ -1,14 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 3 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.5 -2212.4096 1 -2211.7959
-10 0.27332818 0.040944923 0.039164338 0.27332818 0.17804882 0.51235911 0.497084 1.6790474 0 -2213.3364 0.49024121 -2212.824 1 -2213.3211
-20 0.1820396 0.018049916 0.024428411 0.1820396 0.08601739 0.51038174 0.5080746 1.7224961 0 -2213.337 0.49199582 -2212.8266 1 -2213.3347
-30 0.043288796 0.0068108825 0.017372479 0.043288796 0.049466709 0.51032316 0.5095943 1.7304745 0 -2213.3371 0.49553568 -2212.8268 1 -2213.3364
-40 0.042139318 0.0037035773 0.011737071 0.042139318 0.026104737 0.51022733 0.5100163 1.7366752 0 -2213.3373 0.49838067 -2212.8271 1 -2213.3371
-50 0.025904121 0.0022808707 0.0081058431 0.025904121 0.016908969 0.5101712 0.51008591 1.7391431 0 -2213.3373 0.49923346 -2212.8272 1 -2213.3373
-59 0.0096287928 0.0012946716 0.005657861 0.0093731008 0.01204121 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955696 -2212.8272 1 -2213.3373
-Climbing replica = 2
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-59 0.01204121 0.0031506449 0.005657861 0.0093731008 0.01204121 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955696 -2212.8272 1 -2213.3373
-63 0.0091530442 0.0016693787 0.0049652227 0.0082075097 0.0091530442 0.51013743 0.51010775 1.7409027 0 -2213.3374 0.50022228 -2212.8272 1 -2213.3373
diff --git a/src/CORESHELL/compute_temp_cs.h b/src/CORESHELL/compute_temp_cs.h
index 5a1d1434c3..3e93e4a68c 100644
--- a/src/CORESHELL/compute_temp_cs.h
+++ b/src/CORESHELL/compute_temp_cs.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/CORESHELL/pair_born_coul_long_cs.h b/src/CORESHELL/pair_born_coul_long_cs.h
index d2c8c04849..68c29e4fc2 100644
--- a/src/CORESHELL/pair_born_coul_long_cs.h
+++ b/src/CORESHELL/pair_born_coul_long_cs.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/CORESHELL/pair_buck_coul_long_cs.h b/src/CORESHELL/pair_buck_coul_long_cs.h
index 7f0bc149c1..d6b117d677 100644
--- a/src/CORESHELL/pair_buck_coul_long_cs.h
+++ b/src/CORESHELL/pair_buck_coul_long_cs.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/GPU/pair_lj_cubic_gpu.h b/src/GPU/pair_lj_cubic_gpu.h
index 1591eb8b9e..cdfc157e8e 100644
--- a/src/GPU/pair_lj_cubic_gpu.h
+++ b/src/GPU/pair_lj_cubic_gpu.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/GPU/pair_tersoff_gpu.h b/src/GPU/pair_tersoff_gpu.h
index 4fa358a6b1..ed3dadef5d 100644
--- a/src/GPU/pair_tersoff_gpu.h
+++ b/src/GPU/pair_tersoff_gpu.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/GPU/pair_tersoff_mod_gpu.h b/src/GPU/pair_tersoff_mod_gpu.h
index 6d3017669a..3967e90a70 100644
--- a/src/GPU/pair_tersoff_mod_gpu.h
+++ b/src/GPU/pair_tersoff_mod_gpu.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/GPU/pair_tersoff_zbl_gpu.h b/src/GPU/pair_tersoff_zbl_gpu.h
index 003e037bba..ba923ffd2f 100644
--- a/src/GPU/pair_tersoff_zbl_gpu.h
+++ b/src/GPU/pair_tersoff_zbl_gpu.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/GPU/pair_zbl_gpu.h b/src/GPU/pair_zbl_gpu.h
index 950fe952dd..3e6ac37394 100644
--- a/src/GPU/pair_zbl_gpu.h
+++ b/src/GPU/pair_zbl_gpu.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/KOKKOS/fix_reaxc_species_kokkos.cpp b/src/KOKKOS/fix_reaxc_species_kokkos.cpp
index ce84de30cb..8b778ecf65 100644
--- a/src/KOKKOS/fix_reaxc_species_kokkos.cpp
+++ b/src/KOKKOS/fix_reaxc_species_kokkos.cpp
@@ -48,7 +48,7 @@ FixReaxCSpeciesKokkos::FixReaxCSpeciesKokkos(LAMMPS *lmp, int narg, char **arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
-
+
// NOTE: Could improve performance if a Kokkos version of ComputeSpecAtom is added
datamask_read = X_MASK | V_MASK | Q_MASK | MASK_MASK;
@@ -116,35 +116,30 @@ void FixReaxCSpeciesKokkos::FindMolecule()
done = 1;
for (ii = 0; ii < inum; ii++) {
- i = ilist[ii];
- if (!(mask[i] & groupbit)) continue;
+ i = ilist[ii];
+ if (!(mask[i] & groupbit)) continue;
- itype = atom->type[i];
+ itype = atom->type[i];
for (jj = 0; jj < MAXSPECBOND; jj++) {
- j = reaxc->tmpid[i][jj];
+ j = reaxc->tmpid[i][jj];
- if (j < i) continue;
- if (!(mask[j] & groupbit)) continue;
+ if ((j == 0) && (j < i)) continue;
+ if (!(mask[j] & groupbit)) continue;
- if (clusterID[i] == clusterID[j] && PBCconnected[i] == PBCconnected[j]
- && x0[i].x == x0[j].x && x0[i].y == x0[j].y && x0[i].z == x0[j].z) continue;
+ if (clusterID[i] == clusterID[j]
+ && x0[i].x == x0[j].x && x0[i].y == x0[j].y && x0[i].z == x0[j].z) continue;
jtype = atom->type[j];
- bo_cut = BOCut[itype][jtype];
- bo_tmp = spec_atom[i][jj+7];
+ bo_cut = BOCut[itype][jtype];
+ bo_tmp = spec_atom[i][jj+7];
- if (bo_tmp > bo_cut) {
+ if (bo_tmp > bo_cut) {
clusterID[i] = clusterID[j] = MIN(clusterID[i], clusterID[j]);
- PBCconnected[i] = PBCconnected[j] = MAX(PBCconnected[i], PBCconnected[j]);
x0[i] = x0[j] = chAnchor(x0[i], x0[j]);
- if ((fabs(spec_atom[i][1] - spec_atom[j][1]) > reaxc->control->bond_cut)
- || (fabs(spec_atom[i][2] - spec_atom[j][2]) > reaxc->control->bond_cut)
- || (fabs(spec_atom[i][3] - spec_atom[j][3]) > reaxc->control->bond_cut))
- PBCconnected[i] = PBCconnected[j] = 1;
- done = 0;
- }
- }
+ done = 0;
+ }
+ }
}
if (!done) change = 1;
if (done) break;
diff --git a/src/KOKKOS/pair_buck_kokkos.h b/src/KOKKOS/pair_buck_kokkos.h
index d57e320e99..2691f10929 100644
--- a/src/KOKKOS/pair_buck_kokkos.h
+++ b/src/KOKKOS/pair_buck_kokkos.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/KOKKOS/pair_sw_kokkos.h b/src/KOKKOS/pair_sw_kokkos.h
index d899edfc1b..b94e39335f 100644
--- a/src/KOKKOS/pair_sw_kokkos.h
+++ b/src/KOKKOS/pair_sw_kokkos.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/KOKKOS/pair_vashishta_kokkos.h b/src/KOKKOS/pair_vashishta_kokkos.h
index 49c936185d..174db2cb94 100644
--- a/src/KOKKOS/pair_vashishta_kokkos.h
+++ b/src/KOKKOS/pair_vashishta_kokkos.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
index 6e17a9bbd7..30d8ab64b6 100644
--- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
+++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
@@ -25,6 +25,7 @@
#include
#include "pair_lj_charmmfsw_coul_long.h"
#include "atom.h"
+#include "update.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
@@ -61,6 +62,15 @@ PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp)
// short-range/long-range flag accessed by DihedralCharmmfsw
dihedflag = 1;
+
+ // switch qqr2e from LAMMPS value to CHARMM value
+
+ if (strcmp(update->unit_style,"real") == 0) {
+ if ((comm->me == 0) && (force->qqr2e != force->qqr2e_charmm_real))
+ error->message(FLERR,"Switching to CHARMM coulomb energy"
+ " conversion constant");
+ force->qqr2e = force->qqr2e_charmm_real;
+ }
}
/* ---------------------------------------------------------------------- */
@@ -87,6 +97,15 @@ PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong()
}
if (ftable) free_tables();
}
+
+ // switch qqr2e back from CHARMM value to LAMMPS value
+
+ if (update && strcmp(update->unit_style,"real") == 0) {
+ if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real))
+ error->message(FLERR,"Restoring original LAMMPS coulomb energy"
+ " conversion constant");
+ force->qqr2e = force->qqr2e_lammps_real;
+ }
}
/* ---------------------------------------------------------------------- */
diff --git a/src/KSPACE/pair_lj_long_tip4p_long.cpp b/src/KSPACE/pair_lj_long_tip4p_long.cpp
index d2a6b801fc..1dc1ca1cb4 100644
--- a/src/KSPACE/pair_lj_long_tip4p_long.cpp
+++ b/src/KSPACE/pair_lj_long_tip4p_long.cpp
@@ -1337,8 +1337,8 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag)
fH[1] = 0.5 * alpha * fd[1];
fH[2] = 0.5 * alpha * fd[2];
- xH1 = x[jH1];
- xH2 = x[jH2];
+ xH1 = x[iH1];
+ xH2 = x[iH2];
v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
index 2cb37ed9fe..ac8279949a 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
@@ -8,7 +8,7 @@ SHELL = /bin/sh
CC = mpiicpc
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
+CCFLAGS = -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC
DEPFLAGS = -M
diff --git a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
index b7f3cd6846..db5de83a06 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
+++ b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
@@ -8,7 +8,7 @@ SHELL = /bin/sh
CC = mpiicpc
MIC_OPT = -qoffload-arch=mic-avx512 -fp-model fast=2
-CCFLAGS = -g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
+CCFLAGS = -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
-xHost -fno-alias -ansi-alias -restrict \
-qoverride-limits $(MIC_OPT)
SHFLAGS = -fPIC
diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl
index 3bc777592e..881c51f0e4 100644
--- a/src/MAKE/OPTIONS/Makefile.knl
+++ b/src/MAKE/OPTIONS/Makefile.knl
@@ -8,7 +8,7 @@ SHELL = /bin/sh
CC = mpiicpc
OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
+CCFLAGS = -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC
DEPFLAGS = -M
diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp
index d83f5a39a8..0ca80c6b76 100644
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@@ -1271,7 +1271,7 @@ double PairAIREBO::bondorder(int i, int j, double rij[3],
double w21,dw21,r34[3],r34mag,cos234,w34,dw34;
double cross321[3],cross234[3],prefactor,SpN;
double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc;
- double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom;
+ double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa2,at2,cw,cwnum,cwnom;
double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2;
double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i;
double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij;
@@ -1856,8 +1856,6 @@ double PairAIREBO::bondorder(int i, int j, double rij[3],
aa = (prefactor*2.0*cw/cwnom)*w21*w34 *
(1.0-tspjik)*(1.0-tspijl);
- aaa1 = -prefactor*(1.0-square(om1234)) *
- (1.0-tspjik)*(1.0-tspijl);
aaa2 = -prefactor*(1.0-square(om1234)) * w21*w34;
at2 = aa*cwnum;
@@ -2107,7 +2105,7 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag,
double w21,dw21,r34[3],r34mag,cos234,w34,dw34;
double cross321[3],cross234[3],prefactor,SpN;
double fcikpc,fcjlpc,fcjkpc,fcilpc;
- double dt2dik[3],dt2djl[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom;
+ double dt2dik[3],dt2djl[3],aa,aaa2,at2,cw,cwnum,cwnom;
double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2;
double dctik,dctjk,dctjl,dctil,rik2i,rjl2i,sink2i,sinl2i;
double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil;
@@ -2800,8 +2798,6 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag,
aa = (prefactor*2.0*cw/cwnom)*w21*w34 *
(1.0-tspjik)*(1.0-tspijl);
- aaa1 = -prefactor*(1.0-square(om1234)) *
- (1.0-tspjik)*(1.0-tspijl);
aaa2 = -prefactor*(1.0-square(om1234)) * w21*w34;
at2 = aa*cwnum;
diff --git a/src/MANYBODY/pair_bop.h b/src/MANYBODY/pair_bop.h
index d55d9a79a4..f50c5edd00 100644
--- a/src/MANYBODY/pair_bop.h
+++ b/src/MANYBODY/pair_bop.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/MANYBODY/pair_polymorphic.h b/src/MANYBODY/pair_polymorphic.h
index 9b7fe761bb..9917bcd96d 100644
--- a/src/MANYBODY/pair_polymorphic.h
+++ b/src/MANYBODY/pair_polymorphic.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/MANYBODY/pair_vashishta_table.h b/src/MANYBODY/pair_vashishta_table.h
index a45cac5ae1..8c52f967cb 100644
--- a/src/MANYBODY/pair_vashishta_table.h
+++ b/src/MANYBODY/pair_vashishta_table.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/MC/fix_atom_swap.h b/src/MC/fix_atom_swap.h
index 25208a2b5a..74720d6222 100644
--- a/src/MC/fix_atom_swap.h
+++ b/src/MC/fix_atom_swap.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/MC/fix_gcmc.h b/src/MC/fix_gcmc.h
index 8a5375eed7..3656a1df58 100644
--- a/src/MC/fix_gcmc.h
+++ b/src/MC/fix_gcmc.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/MC/fix_tfmc.h b/src/MC/fix_tfmc.h
index fee3a944cd..d4f121eb90 100644
--- a/src/MC/fix_tfmc.h
+++ b/src/MC/fix_tfmc.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/MOLECULE/dihedral_charmm.cpp b/src/MOLECULE/dihedral_charmm.cpp
index b9d1c440d4..35953a6ac4 100644
--- a/src/MOLECULE/dihedral_charmm.cpp
+++ b/src/MOLECULE/dihedral_charmm.cpp
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
#include "dihedral_charmm.h"
#include "atom.h"
#include "comm.h"
@@ -26,6 +27,7 @@
#include "force.h"
#include "pair.h"
#include "update.h"
+#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
@@ -368,10 +370,26 @@ void DihedralCharmm::coeff(int narg, char **arg)
void DihedralCharmm::init_style()
{
+ if (strstr(update->integrate_style,"respa")) {
+ Respa *r = (Respa *) update->integrate;
+ if (r->level_pair >= 0 && (r->level_pair != r->level_dihedral))
+ error->all(FLERR,"Dihedral style charmm must be set to same"
+ " r-RESPA level as 'pair'");
+ if (r->level_outer >= 0 && (r->level_outer != r->level_dihedral))
+ error->all(FLERR,"Dihedral style charmm must be set to same"
+ " r-RESPA level as 'outer'");
+ }
+
// insure use of CHARMM pair_style if any weight factors are non-zero
// set local ptrs to LJ 14 arrays setup by Pair
+ // also verify that the correct 1-4 scaling is set
if (weightflag) {
+
+ if ((force->special_lj[3] != 0.0) || (force->special_coul[3] != 0.0))
+ error->all(FLERR,"Must use 'special_bonds charmm' with"
+ " dihedral style charmm for use with CHARMM pair styles");
+
int itmp;
if (force->pair == NULL)
error->all(FLERR,"Dihedral charmm is incompatible with Pair style");
diff --git a/src/MOLECULE/dihedral_charmmfsw.cpp b/src/MOLECULE/dihedral_charmmfsw.cpp
index 613170bbfa..feb3e02bd4 100644
--- a/src/MOLECULE/dihedral_charmmfsw.cpp
+++ b/src/MOLECULE/dihedral_charmmfsw.cpp
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include "dihedral_charmmfsw.h"
#include "atom.h"
#include "comm.h"
@@ -29,6 +30,7 @@
#include "force.h"
#include "pair.h"
#include "update.h"
+#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
@@ -386,10 +388,26 @@ void DihedralCharmmfsw::coeff(int narg, char **arg)
void DihedralCharmmfsw::init_style()
{
+ if (strstr(update->integrate_style,"respa")) {
+ Respa *r = (Respa *) update->integrate;
+ if (r->level_pair >= 0 && (r->level_pair != r->level_dihedral))
+ error->all(FLERR,"Dihedral style charmmfsw must be set to same"
+ " r-RESPA level as 'pair'");
+ if (r->level_outer >= 0 && (r->level_outer != r->level_dihedral))
+ error->all(FLERR,"Dihedral style charmmfsw must be set to same"
+ " r-RESPA level as 'outer'");
+ }
+
// insure use of CHARMM pair_style if any weight factors are non-zero
// set local ptrs to LJ 14 arrays setup by Pair
+ // also verify that the correct 1-4 scaling is set
if (weightflag) {
+
+ if ((force->special_lj[3] != 0.0) || (force->special_coul[3] != 0.0))
+ error->all(FLERR,"Must use 'special_bonds charmm' with"
+ " dihedral style charmm for use with CHARMM pair styles");
+
int itmp;
if (force->pair == NULL)
error->all(FLERR,"Dihedral charmmfsw is incompatible with Pair style");
diff --git a/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp b/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp
index 1e34b06478..0d2159b671 100644
--- a/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp
+++ b/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp
@@ -25,6 +25,7 @@
#include
#include "pair_lj_charmmfsw_coul_charmmfsh.h"
#include "atom.h"
+#include "update.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
@@ -46,6 +47,15 @@ PairLJCharmmfswCoulCharmmfsh::PairLJCharmmfswCoulCharmmfsh(LAMMPS *lmp) :
// short-range/long-range flag accessed by DihedralCharmmfsw
dihedflag = 0;
+
+ // switch qqr2e from LAMMPS value to CHARMM value
+
+ if (strcmp(update->unit_style,"real") == 0) {
+ if ((comm->me == 0) && (force->qqr2e != force->qqr2e_charmm_real))
+ error->message(FLERR,"Switching to CHARMM coulomb energy"
+ " conversion constant");
+ force->qqr2e = force->qqr2e_charmm_real;
+ }
}
/* ---------------------------------------------------------------------- */
@@ -71,6 +81,15 @@ PairLJCharmmfswCoulCharmmfsh::~PairLJCharmmfswCoulCharmmfsh()
memory->destroy(lj14_4);
}
}
+
+ // switch qqr2e back from CHARMM value to LAMMPS value
+
+ if (update && strcmp(update->unit_style,"real") == 0) {
+ if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real))
+ error->message(FLERR,"Restoring original LAMMPS coulomb energy"
+ " conversion constant");
+ force->qqr2e = force->qqr2e_lammps_real;
+ }
}
/* ---------------------------------------------------------------------- */
diff --git a/src/REPLICA/fix_neb.cpp b/src/REPLICA/fix_neb.cpp
index b17315ca0d..6daaf94710 100644
--- a/src/REPLICA/fix_neb.cpp
+++ b/src/REPLICA/fix_neb.cpp
@@ -34,6 +34,9 @@ using namespace FixConst;
using namespace MathConst;
enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC};
+
+#define BUFSIZE 8
+
/* ---------------------------------------------------------------------- */
FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) :
@@ -46,55 +49,67 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) :
displacements(NULL)
{
- NEBLongRange=false;
- StandardNEB=true;
- PerpSpring=FreeEndIni=FreeEndFinal=false;
- FreeEndFinalWithRespToEIni=FinalAndInterWithRespToEIni=false;
-
- kspringPerp=0.0;
- kspring2=1.0;
- if (narg < 4)
- error->all(FLERR,"Illegal fix neb command, argument missing");
+ if (narg < 4) error->all(FLERR,"Illegal fix neb command");
kspring = force->numeric(FLERR,arg[3]);
- if (kspring <= 0.0)
- error->all(FLERR,"Illegal fix neb command."
- " The spring force was not provided properly");
+ if (kspring <= 0.0) error->all(FLERR,"Illegal fix neb command");
- int iarg =4;
+ // optional params
+
+ NEBLongRange = false;
+ StandardNEB = true;
+ PerpSpring = FreeEndIni = FreeEndFinal = false;
+ FreeEndFinalWithRespToEIni = FinalAndInterWithRespToEIni = false;
+ kspringPerp = 0.0;
+ kspringIni = 1.0;
+ kspringFinal = 1.0;
+
+ int iarg = 4;
while (iarg < narg) {
- if (strcmp (arg[iarg],"nudg_style")==0) {
- if (strcmp (arg[iarg+1],"idealpos")==0) {
- NEBLongRange = true;
- iarg+=2;}
- else if (strcmp (arg[iarg+1],"neigh")==0) {
- NEBLongRange = false;
- StandardNEB = true;
- iarg+=2;}
- else error->all(FLERR,"Illegal fix neb command. Unknown keyword");}
- else if (strcmp (arg[iarg],"perp")==0) {
- PerpSpring=true;
+ if (strcmp(arg[iarg],"parallel") == 0) {
+ if (iarg+2 > narg) error->all(FLERR,"Illegal fix neb command");
+ if (strcmp(arg[iarg+1],"ideal") == 0) {
+ NEBLongRange = true;
+ StandardNEB = false;
+ } else if (strcmp(arg[iarg+1],"neigh") == 0) {
+ NEBLongRange = false;
+ StandardNEB = true;
+ } else error->all(FLERR,"Illegal fix neb command");
+ iarg += 2;
+
+ } else if (strcmp(arg[iarg],"perp") == 0) {
+ if (iarg+2 > narg) error->all(FLERR,"Illegal fix neb command");
+ PerpSpring = true;
kspringPerp = force->numeric(FLERR,arg[iarg+1]);
- if (kspringPerp < 0.0)
- error->all(FLERR,"Illegal fix neb command. "
- "The perpendicular spring force was not provided properly");
- iarg+=2;}
- else if (strcmp (arg[iarg],"freeend")==0) {
- if (strcmp (arg[iarg+1],"ini")==0)
- FreeEndIni=true;
- else if (strcmp (arg[iarg+1],"final")==0)
- FreeEndFinal=true;
- else if (strcmp (arg[iarg+1],"finaleini")==0)
- FreeEndFinalWithRespToEIni=true;
- else if (strcmp (arg[iarg+1],"final2eini")==0) {
- FinalAndInterWithRespToEIni=true;
- FreeEndFinalWithRespToEIni=true;}
- else if (strcmp (arg[iarg+1],"none")!=0) error->all(FLERR,"Illegal fix neb command. Unknown keyword");
- iarg+=2;}
- else if (strcmp (arg[iarg],"freeend_kspring")==0) {
- kspring2=force->numeric(FLERR,arg[iarg+1]);
- iarg+=2; }
- else error->all(FLERR,"Illegal fix neb command. Unknown keyword");
+ if (kspringPerp == 0.0) PerpSpring = false;
+ if (kspringPerp < 0.0) error->all(FLERR,"Illegal fix neb command");
+ iarg += 2;
+
+ } else if (strcmp (arg[iarg],"end") == 0) {
+ if (iarg+3 > narg) error->all(FLERR,"Illegal fix neb command");
+ if (strcmp(arg[iarg+1],"first") == 0) {
+ FreeEndIni = true;
+ kspringIni = force->numeric(FLERR,arg[iarg+2]);
+ } else if (strcmp(arg[iarg+1],"last") == 0) {
+ FreeEndFinal = true;
+ FinalAndInterWithRespToEIni = false;
+ FreeEndFinalWithRespToEIni = false;
+ kspringFinal = force->numeric(FLERR,arg[iarg+2]);
+ } else if (strcmp(arg[iarg+1],"last/efirst") == 0) {
+ FreeEndFinal = false;
+ FinalAndInterWithRespToEIni = false;
+ FreeEndFinalWithRespToEIni = true;
+ kspringFinal = force->numeric(FLERR,arg[iarg+2]);
+ } else if (strcmp(arg[iarg+1],"last/efirst/middle") == 0) {
+ FreeEndFinal = false;
+ FinalAndInterWithRespToEIni = true;
+ FreeEndFinalWithRespToEIni = true;
+ kspringFinal = force->numeric(FLERR,arg[iarg+2]);
+ } else error->all(FLERR,"Illegal fix neb command");
+
+ iarg += 3;
+
+ } else error->all(FLERR,"Illegal fix neb command");
}
// nreplica = number of partitions
@@ -119,12 +134,12 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) :
MPI_Group uworldgroup,rootgroup;
if (NEBLongRange) {
for (int i=0; iroot_proc[i];
+ iroots[i] = universe->root_proc[i];
MPI_Comm_group(uworld, &uworldgroup);
MPI_Group_incl(uworldgroup, nreplica, iroots, &rootgroup);
MPI_Comm_create(uworld, rootgroup, &rootworld);
}
- delete[] iroots;
+ delete [] iroots;
// create a new compute pe style
// id = fix-ID + pe, compute group = all
@@ -256,11 +271,11 @@ void FixNEB::min_post_force(int vflag)
double delxp,delyp,delzp,delxn,delyn,delzn;
double vIni=0.0;
- vprev=vnext=veng=pe->compute_scalar();
+ vprev = vnext = veng = pe->compute_scalar();
- if (ireplica < nreplica-1 && me ==0)
+ if (ireplica < nreplica-1 && me == 0)
MPI_Send(&veng,1,MPI_DOUBLE,procnext,0,uworld);
- if (ireplica > 0 && me ==0)
+ if (ireplica > 0 && me == 0)
MPI_Recv(&vprev,1,MPI_DOUBLE,procprev,0,uworld,MPI_STATUS_IGNORE);
if (ireplica > 0 && me == 0)
@@ -273,7 +288,7 @@ void FixNEB::min_post_force(int vflag)
MPI_Bcast(&vnext,1,MPI_DOUBLE,0,world);
}
- if (FreeEndFinal && (update->ntimestep == 0)) EFinalIni = veng;
+ if (FreeEndFinal && ireplica == nreplica-1 && (update->ntimestep == 0)) EFinalIni = veng;
if (ireplica == 0) vIni=veng;
@@ -287,16 +302,19 @@ void FixNEB::min_post_force(int vflag)
MPI_Bcast(&vIni,1,MPI_DOUBLE,0,world);
}
}
- if (FreeEndIni && ireplica == 0) {
- if (me == 0 )
+
+ if (FreeEndIni && ireplica == 0 && (update->ntimestep == 0)) EIniIni = veng;
+ /* if (FreeEndIni && ireplica == 0) {
+ // if (me == 0 )
if (update->ntimestep == 0) {
EIniIni = veng;
- if (cmode == MULTI_PROC)
- MPI_Bcast(&EIniIni,1,MPI_DOUBLE,0,world);
+ // if (cmode == MULTI_PROC)
+ // MPI_Bcast(&EIniIni,1,MPI_DOUBLE,0,world);
}
- }
+ }*/
// communicate atoms to/from adjacent replicas to fill xprev,xnext
+
inter_replica_comm();
// trigger potential energy computation on next timestep
@@ -335,10 +353,10 @@ void FixNEB::min_post_force(int vflag)
tangent[i][0]=delxp;
tangent[i][1]=delyp;
tangent[i][2]=delzp;
- tlen += tangent[i][0]*tangent[i][0]
- + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
- dot += f[i][0]*tangent[i][0]
- + f[i][1]*tangent[i][1] + f[i][2]*tangent[i][2];
+ tlen += tangent[i][0]*tangent[i][0] +
+ tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
+ dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] +
+ f[i][2]*tangent[i][2];
}
}
@@ -360,10 +378,10 @@ void FixNEB::min_post_force(int vflag)
tangent[i][0]=delxn;
tangent[i][1]=delyn;
tangent[i][2]=delzn;
- tlen += tangent[i][0]*tangent[i][0]
- + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
- dot += f[i][0]*tangent[i][0]
- + f[i][1]*tangent[i][1] + f[i][2]*tangent[i][2];
+ tlen += tangent[i][0]*tangent[i][0] +
+ tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
+ dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] +
+ f[i][2]*tangent[i][2];
}
}
} else {
@@ -388,13 +406,13 @@ void FixNEB::min_post_force(int vflag)
domain->minimum_image(delxn,delyn,delzn);
if (vnext > veng && veng > vprev) {
- tangent[i][0]=delxn;
- tangent[i][1]=delyn;
- tangent[i][2]=delzn;
+ tangent[i][0] = delxn;
+ tangent[i][1] = delyn;
+ tangent[i][2] = delzn;
} else if (vnext < veng && veng < vprev) {
- tangent[i][0]=delxp;
- tangent[i][1]=delyp;
- tangent[i][2]=delzp;
+ tangent[i][0] = delxp;
+ tangent[i][1] = delyp;
+ tangent[i][2] = delzp;
} else {
if (vnext > vprev) {
tangent[i][0] = vmax*delxn + vmin*delxp;
@@ -408,24 +426,23 @@ void FixNEB::min_post_force(int vflag)
}
nlen += delxn*delxn + delyn*delyn + delzn*delzn;
- tlen += tangent[i][0]*tangent[i][0]
- + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
+ tlen += tangent[i][0]*tangent[i][0] +
+ tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
dotpath += delxp*delxn + delyp*delyn + delzp*delzn;
- dottangrad += tangent[i][0]* f[i][0]
- + tangent[i][1]*f[i][1] + tangent[i][2]*f[i][2];
- gradnextlen += fnext[i][0]*fnext[i][0]
- + fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2];
- dotgrad += f[i][0]*fnext[i][0]
- + f[i][1]*fnext[i][1] + f[i][2]*fnext[i][2];
+ dottangrad += tangent[i][0]*f[i][0] +
+ tangent[i][1]*f[i][1] + tangent[i][2]*f[i][2];
+ gradnextlen += fnext[i][0]*fnext[i][0] +
+ fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2];
+ dotgrad += f[i][0]*fnext[i][0] + f[i][1]*fnext[i][1] +
+ f[i][2]*fnext[i][2];
- springF[i][0]=kspringPerp*(delxn-delxp);
- springF[i][1]=kspringPerp*(delyn-delyp);
- springF[i][2]=kspringPerp*(delzn-delzp);
+ springF[i][0] = kspringPerp*(delxn-delxp);
+ springF[i][1] = kspringPerp*(delyn-delyp);
+ springF[i][2] = kspringPerp*(delzn-delzp);
}
}
-#define BUFSIZE 8
double bufin[BUFSIZE], bufout[BUFSIZE];
bufin[0] = nlen;
bufin[1] = plen;
@@ -459,7 +476,7 @@ void FixNEB::min_post_force(int vflag)
// first or last replica has no change to forces, just return
- if(ireplica>0 && ireplica 0 && ireplica < nreplica-1)
dottangrad = dottangrad/(tlen*gradlen);
if (ireplica == 0)
dottangrad = dottangrad/(nlen*gradlen);
@@ -468,15 +485,14 @@ void FixNEB::min_post_force(int vflag)
if (ireplica < nreplica-1)
dotgrad = dotgrad /(gradlen*gradnextlen);
-
if (FreeEndIni && ireplica == 0) {
if (tlen > 0.0) {
double dotall;
MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
dot=dotall/tlen;
- if (dot<0) prefactor = -dot - kspring2*(veng-EIniIni);
- else prefactor = -dot + kspring2*(veng-EIniIni);
+ if (dot<0) prefactor = -dot - kspringIni*(veng-EIniIni);
+ else prefactor = -dot + kspringIni*(veng-EIniIni);
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
@@ -493,8 +509,8 @@ void FixNEB::min_post_force(int vflag)
MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
dot=dotall/tlen;
- if (dot<0) prefactor = -dot - kspring2*(veng-EFinalIni);
- else prefactor = -dot + kspring2*(veng-EFinalIni);
+ if (dot<0) prefactor = -dot - kspringFinal*(veng-EFinalIni);
+ else prefactor = -dot + kspringFinal*(veng-EFinalIni);
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
@@ -511,8 +527,8 @@ void FixNEB::min_post_force(int vflag)
MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
dot=dotall/tlen;
- if (dot<0) prefactor = -dot - kspring2*(veng-vIni);
- else prefactor = -dot + kspring2*(veng-vIni);
+ if (dot<0) prefactor = -dot - kspringFinal*(veng-vIni);
+ else prefactor = -dot + kspringFinal*(veng-vIni);
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
@@ -568,14 +584,15 @@ void FixNEB::min_post_force(int vflag)
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
- dot += f[i][0]*tangent[i][0]
- + f[i][1]*tangent[i][1] + f[i][2]*tangent[i][2];
- dotSpringTangent += springF[i][0]*tangent[i][0]
- +springF[i][1]*tangent[i][1]+springF[i][2]*tangent[i][2];}
+ dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] +
+ f[i][2]*tangent[i][2];
+ dotSpringTangent += springF[i][0]*tangent[i][0] +
+ springF[i][1]*tangent[i][1] + springF[i][2]*tangent[i][2];}
}
double dotSpringTangentall;
- MPI_Allreduce(&dotSpringTangent,&dotSpringTangentall,1,MPI_DOUBLE,MPI_SUM,world);
+ MPI_Allreduce(&dotSpringTangent,&dotSpringTangentall,1,
+ MPI_DOUBLE,MPI_SUM,world);
dotSpringTangent=dotSpringTangentall;
double dotall;
MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
@@ -603,12 +620,12 @@ void FixNEB::min_post_force(int vflag)
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
- f[i][0] += prefactor*tangent[i][0]
- +AngularContr*(springF[i][0] -dotSpringTangent*tangent[i][0]);
- f[i][1] += prefactor*tangent[i][1]
- + AngularContr*(springF[i][1] - dotSpringTangent*tangent[i][1]);
- f[i][2] += prefactor*tangent[i][2]
- + AngularContr*(springF[i][2] - dotSpringTangent*tangent[i][2]);
+ f[i][0] += prefactor*tangent[i][0] +
+ AngularContr*(springF[i][0] - dotSpringTangent*tangent[i][0]);
+ f[i][1] += prefactor*tangent[i][1] +
+ AngularContr*(springF[i][1] - dotSpringTangent*tangent[i][1]);
+ f[i][2] += prefactor*tangent[i][2] +
+ AngularContr*(springF[i][2] - dotSpringTangent*tangent[i][2]);
}
}
@@ -827,7 +844,6 @@ void FixNEB::inter_replica_comm()
}
}
-
/* ----------------------------------------------------------------------
reallocate xprev,xnext,tangent arrays if necessary
reallocate communication arrays if necessary
diff --git a/src/REPLICA/fix_neb.h b/src/REPLICA/fix_neb.h
index 7e9e6db865..232790a1f0 100644
--- a/src/REPLICA/fix_neb.h
+++ b/src/REPLICA/fix_neb.h
@@ -38,7 +38,7 @@ class FixNEB : public Fix {
private:
int me,nprocs,nprocs_universe;
- double kspring,kspring2,kspringPerp,EIniIni,EFinalIni;
+ double kspring,kspringIni,kspringFinal,kspringPerp,EIniIni,EFinalIni;
bool StandardNEB,NEBLongRange,PerpSpring,FreeEndIni,FreeEndFinal;
bool FreeEndFinalWithRespToEIni,FinalAndInterWithRespToEIni;
int ireplica,nreplica;
diff --git a/src/RIGID/fix_ehex.h b/src/RIGID/fix_ehex.h
index 3220b77195..02f83df1af 100644
--- a/src/RIGID/fix_ehex.h
+++ b/src/RIGID/fix_ehex.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/mf_oxdna.h b/src/USER-CGDNA/mf_oxdna.h
index 642c325af9..56055d5fac 100644
--- a/src/USER-CGDNA/mf_oxdna.h
+++ b/src/USER-CGDNA/mf_oxdna.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna2_coaxstk.h b/src/USER-CGDNA/pair_oxdna2_coaxstk.h
index 477b35ee13..be8d6d6b37 100644
--- a/src/USER-CGDNA/pair_oxdna2_coaxstk.h
+++ b/src/USER-CGDNA/pair_oxdna2_coaxstk.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna2_dh.h b/src/USER-CGDNA/pair_oxdna2_dh.h
index 3af355d503..b40346e1cf 100644
--- a/src/USER-CGDNA/pair_oxdna2_dh.h
+++ b/src/USER-CGDNA/pair_oxdna2_dh.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna2_excv.h b/src/USER-CGDNA/pair_oxdna2_excv.h
index 94e39a0fa2..f59daf8361 100644
--- a/src/USER-CGDNA/pair_oxdna2_excv.h
+++ b/src/USER-CGDNA/pair_oxdna2_excv.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna2_stk.h b/src/USER-CGDNA/pair_oxdna2_stk.h
index b78fc89d5e..7654e5db2f 100644
--- a/src/USER-CGDNA/pair_oxdna2_stk.h
+++ b/src/USER-CGDNA/pair_oxdna2_stk.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna_coaxstk.h b/src/USER-CGDNA/pair_oxdna_coaxstk.h
index b12ef6e77b..f9228c94a2 100644
--- a/src/USER-CGDNA/pair_oxdna_coaxstk.h
+++ b/src/USER-CGDNA/pair_oxdna_coaxstk.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna_excv.h b/src/USER-CGDNA/pair_oxdna_excv.h
index 0308c1f48e..ec9ddee3ec 100644
--- a/src/USER-CGDNA/pair_oxdna_excv.h
+++ b/src/USER-CGDNA/pair_oxdna_excv.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna_hbond.h b/src/USER-CGDNA/pair_oxdna_hbond.h
index 409241710b..1c9f37bf50 100644
--- a/src/USER-CGDNA/pair_oxdna_hbond.h
+++ b/src/USER-CGDNA/pair_oxdna_hbond.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna_stk.h b/src/USER-CGDNA/pair_oxdna_stk.h
index fd0c27d38c..950c276228 100644
--- a/src/USER-CGDNA/pair_oxdna_stk.h
+++ b/src/USER-CGDNA/pair_oxdna_stk.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-CGDNA/pair_oxdna_xstk.h b/src/USER-CGDNA/pair_oxdna_xstk.h
index c71962ab52..5c443a4dac 100644
--- a/src/USER-CGDNA/pair_oxdna_xstk.h
+++ b/src/USER-CGDNA/pair_oxdna_xstk.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DIFFRACTION/compute_saed.h b/src/USER-DIFFRACTION/compute_saed.h
index 89e57f5097..87785c4936 100644
--- a/src/USER-DIFFRACTION/compute_saed.h
+++ b/src/USER-DIFFRACTION/compute_saed.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DIFFRACTION/compute_saed_consts.h b/src/USER-DIFFRACTION/compute_saed_consts.h
index 0cce0abfc2..0c07ae13ad 100644
--- a/src/USER-DIFFRACTION/compute_saed_consts.h
+++ b/src/USER-DIFFRACTION/compute_saed_consts.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DIFFRACTION/compute_xrd.h b/src/USER-DIFFRACTION/compute_xrd.h
index 92a59fcf23..61e1dae1bd 100644
--- a/src/USER-DIFFRACTION/compute_xrd.h
+++ b/src/USER-DIFFRACTION/compute_xrd.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DIFFRACTION/compute_xrd_consts.h b/src/USER-DIFFRACTION/compute_xrd_consts.h
index 1ca0d6bd66..582cecae01 100644
--- a/src/USER-DIFFRACTION/compute_xrd_consts.h
+++ b/src/USER-DIFFRACTION/compute_xrd_consts.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DIFFRACTION/fix_saed_vtk.h b/src/USER-DIFFRACTION/fix_saed_vtk.h
index 294b003b0c..fa379e7216 100644
--- a/src/USER-DIFFRACTION/fix_saed_vtk.h
+++ b/src/USER-DIFFRACTION/fix_saed_vtk.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DPD/fix_dpd_energy.h b/src/USER-DPD/fix_dpd_energy.h
index 9be41c3b9a..89ba84c08b 100644
--- a/src/USER-DPD/fix_dpd_energy.h
+++ b/src/USER-DPD/fix_dpd_energy.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DPD/fix_rx.h b/src/USER-DPD/fix_rx.h
index 5e226aec73..ca87fc51fd 100644
--- a/src/USER-DPD/fix_rx.h
+++ b/src/USER-DPD/fix_rx.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DPD/pair_exp6_rx.h b/src/USER-DPD/pair_exp6_rx.h
index 31d4ffb20b..45c046cc07 100644
--- a/src/USER-DPD/pair_exp6_rx.h
+++ b/src/USER-DPD/pair_exp6_rx.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-DPD/pair_multi_lucy_rx.h b/src/USER-DPD/pair_multi_lucy_rx.h
index 5975bd6ccd..2bfa5d20e3 100644
--- a/src/USER-DPD/pair_multi_lucy_rx.h
+++ b/src/USER-DPD/pair_multi_lucy_rx.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/USER-INTEL/README b/src/USER-INTEL/README
index e32a09c45c..c02014d0ce 100644
--- a/src/USER-INTEL/README
+++ b/src/USER-INTEL/README
@@ -4,6 +4,7 @@
--------------------------------
W. Michael Brown (Intel) michael.w.brown at intel.com
+ William McDoniel (RWTH Aachen University)
Rodrigo Canales (RWTH Aachen University)
Markus H�hnerbach (RWTH Aachen University)
Stan Moore (Sandia)
@@ -14,15 +15,25 @@
-----------------------------------------------------------------------------
-This package is based on the USER-OMP package and provides LAMMPS styles that:
+This package provides LAMMPS styles that:
1. include support for single and mixed precision in addition to double.
2. include modifications to support vectorization for key routines
+ 3. include modifications for data layouts to improve cache efficiency
3. include modifications to support offload to Intel(R) Xeon Phi(TM)
coprocessors
-----------------------------------------------------------------------------
+For Intel server processors codenamed "Skylake", the following flags should
+be added or changed in the Makefile depending on the version:
+
+2017 update 2 - No changes needed
+2017 updates 3 or 4 - Use -xCOMMON-AVX512 and not -xHost or -xCORE-AVX512
+2018 or newer - Use -xHost or -xCORE-AVX512 and -qopt-zmm-usage=high
+
+-----------------------------------------------------------------------------
+
When using the suffix command with "intel", intel styles will be used if they
exist. If the suffix command is used with "hybrid intel omp" and the USER-OMP
USER-OMP styles will be used whenever USER-INTEL styles are not available. This
diff --git a/src/USER-INTEL/TEST/README b/src/USER-INTEL/TEST/README
index cf14fb3237..758c37bf56 100644
--- a/src/USER-INTEL/TEST/README
+++ b/src/USER-INTEL/TEST/README
@@ -4,6 +4,7 @@
# in.intel.lj - Atomic fluid (LJ Benchmark)
# in.intel.rhodo - Protein (Rhodopsin Benchmark)
# in.intel.lc - Liquid Crystal w/ Gay-Berne potential
+# in.intel.eam - Copper benchmark with Embedded Atom Method
# in.intel.sw - Silicon benchmark with Stillinger-Weber
# in.intel.tersoff - Silicon benchmark with Tersoff
# in.intel.water - Coarse-grain water benchmark using Stillinger-Weber
@@ -11,19 +12,26 @@
#############################################################################
#############################################################################
-# Expected Timesteps/second with turbo on and HT enabled, LAMMPS 18-Jun-2016
+# Expected Timesteps/second with turbo on and HT enabled, LAMMPS June-2017
+# - Compiled w/ Intel Parallel Studio 2017u2 and Makefile.intel_cpu_intelmpi
#
# Xeon E5-2697v4 Xeon Phi 7250
#
-# in.intel.lj - 162.764 179.148
-# in.intel.rhodo - 11.633 13.668
-# in.intel.lc - 19.136 24.863
-# in.intel.sw - 139.048 152.026
-# in.intel.tersoff - 82.663 92.985
-# in.intel.water - 59.838 85.704
+# in.intel.lj - 199.5 282.3
+# in.intel.rhodo - 12.4 17.5
+# in.intel.lc - 19.0 25.7
+# in.intel.eam - 59.4 92.8
+# in.intel.sw - 132.4 161.9
+# in.intel.tersoff - 83.3 101.1
+# in.intel.water - 53.4 90.3
#
#############################################################################
+#############################################################################
+# For Skylake server (Xeon) architectures, see notes in the USER-INTEL/README
+# for build flags that should be used.
+#############################################################################
+
#############################################################################
# For Haswell (Xeon v3) architectures, depending on the compiler version,
# it may give better performance to compile for an AVX target (with -xAVX
@@ -42,7 +50,18 @@
# -v m 0.5 # Run for half as long
#############################################################################
-# Example for running benchmarks:
+#############################################################################
+# The LAMMPS newton setting can be controlled from the commandline for the
+# benchmarks with the N variable:
+#
+# -v N on # newton on
+# -v N off # newton off
+#
+# The default is on for all of the benchmarks except for LJ where the off
+# setting performs best with the USER-INTEL package
+#############################################################################
+
+# Example for running benchmarks (see run_benchmarks.sh for script):
# Number of physical cores per node not including hyperthreads
export LMP_CORES=28
@@ -57,26 +76,35 @@ export LMP_BIN=../../lmp_intel_cpu
# LAMMPS root directory
export LMP_ROOT=../../../
-source /opt/intel/parallel_studio_xe_2016.2.062/psxevars.sh
+source source /opt/intel/parallel_studio_xe_2017.2.050/psxevars.sh
+export KMP_BLOCKTIME=0
export I_MPI_PIN_DOMAIN=core
export I_MPI_FABRICS=shm # For single node
+# ONLY FOR INTEL XEON PHI x200 SERIES PROCESSORS
+export I_MPI_SHM_LMT=shm
+
# Generate the restart file for use with liquid crystal benchmark
mpirun -np $LMP_CORES $LMP_BIN -in in.lc_generate_restart -log none
# Benchmark to run
export bench=in.intel.lj
+#############################################################################
+# For Intel Xeon Phi x200 series processors best performance is achieved by
+# using MCDRAM. In flat mode, this can be achieved with numactl,
+# MPI environment variables, or other options provided by batch schedulers
+#############################################################################
#############################################################################
# To run without a optimization package
#############################################################################
-mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none
+mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -v N on
#############################################################################
# To run with USER-OMP package
#############################################################################
-mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk omp 0 -sf omp
+mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk omp 0 -sf omp -v N on
#############################################################################
# To run with USER-INTEL package and no coprocessor
@@ -89,6 +117,9 @@ mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 -sf intel
mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 1 -sf intel
#############################################################################
-# If using PPPM (in.intel.rhodo) on Intel Xeon Phi x200 series processors
+# If using PPPM (e.g. in.intel.rhodo) on Intel Xeon Phi x200 series
+# or Skylake processors
#############################################################################
-mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 omp 3 lrt yes -sf intel
+export KMP_AFFINITY=none
+rthreads=$((OMP_NUM_THREADS-1))
+mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 omp $rthreads lrt yes -sf intel
diff --git a/src/USER-INTEL/TEST/in.intel.eam b/src/USER-INTEL/TEST/in.intel.eam
index e9523a5dd1..5a3b3064af 100644
--- a/src/USER-INTEL/TEST/in.intel.eam
+++ b/src/USER-INTEL/TEST/in.intel.eam
@@ -1,4 +1,6 @@
# bulk Cu lattice
+
+variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 3100 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
@@ -13,6 +15,7 @@ variable z index 2
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
+newton $N
if "$n > 0" then "processors * * * grid numa"
variable xx equal 20*$x
diff --git a/src/USER-INTEL/TEST/in.intel.lc b/src/USER-INTEL/TEST/in.intel.lc
index 0172ba3b4d..411f5d830d 100644
--- a/src/USER-INTEL/TEST/in.intel.lc
+++ b/src/USER-INTEL/TEST/in.intel.lc
@@ -3,6 +3,7 @@
# shape: 2 1.5 1
# cutoff 4.0 with skin 0.8
+variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 840 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
@@ -15,6 +16,7 @@ variable z index 2
variable rr equal floor($t*$m)
+newton $N
if "$n > 0" then "processors * * * grid numa"
units lj
diff --git a/src/USER-INTEL/TEST/in.intel.lj b/src/USER-INTEL/TEST/in.intel.lj
index 8931ca24bc..2b724f6014 100644
--- a/src/USER-INTEL/TEST/in.intel.lj
+++ b/src/USER-INTEL/TEST/in.intel.lj
@@ -1,5 +1,6 @@
# 3d Lennard-Jones melt
+variable N index off # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 7900 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
@@ -15,6 +16,7 @@ variable yy equal 20*$y
variable zz equal 20*$z
variable rr equal floor($t*$m)
+newton $N
if "$n > 0" then "processors * * * grid numa"
units lj
diff --git a/src/USER-INTEL/TEST/in.intel.rhodo b/src/USER-INTEL/TEST/in.intel.rhodo
index 7b3b092607..05145d79c0 100644
--- a/src/USER-INTEL/TEST/in.intel.rhodo
+++ b/src/USER-INTEL/TEST/in.intel.rhodo
@@ -1,5 +1,6 @@
# Rhodopsin model
+variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 520 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
@@ -16,10 +17,11 @@ variable z index 2
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
+newton $N
if "$n > 0" then "processors * * * grid numa"
units real
-neigh_modify delay 5 every 1 binsize $b
+neigh_modify delay 5 every 1
atom_style full
bond_style harmonic
diff --git a/src/USER-INTEL/TEST/in.intel.sw b/src/USER-INTEL/TEST/in.intel.sw
index 077c9bb4fb..494f58dea3 100644
--- a/src/USER-INTEL/TEST/in.intel.sw
+++ b/src/USER-INTEL/TEST/in.intel.sw
@@ -1,5 +1,6 @@
# bulk Si via Stillinger-Weber
+variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 6200 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
@@ -16,6 +17,7 @@ variable zz equal 10*$z
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
+newton $N
if "$n > 0" then "processors * * * grid numa"
units metal
diff --git a/src/USER-INTEL/TEST/in.intel.tersoff b/src/USER-INTEL/TEST/in.intel.tersoff
index f0c6a88f75..574b29f674 100644
--- a/src/USER-INTEL/TEST/in.intel.tersoff
+++ b/src/USER-INTEL/TEST/in.intel.tersoff
@@ -1,5 +1,6 @@
# bulk Si via Tersoff
+variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 2420 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
@@ -16,6 +17,7 @@ variable zz equal 10*$z
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
+newton $N
if "$n > 0" then "processors * * * grid numa"
units metal
diff --git a/src/USER-INTEL/TEST/in.intel.water b/src/USER-INTEL/TEST/in.intel.water
index 1c1fca311f..0643def19e 100644
--- a/src/USER-INTEL/TEST/in.intel.water
+++ b/src/USER-INTEL/TEST/in.intel.water
@@ -1,5 +1,6 @@
# Coarse-grain water simulation using Stillinger-Weber
+variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 2600 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
@@ -11,6 +12,7 @@ variable y index 2
variable z index 2
variable rr equal floor($t*$m)
+newton $N
if "$n > 0" then "processors * * * grid numa"
units real
diff --git a/src/USER-INTEL/TEST/in.lc_generate_restart b/src/USER-INTEL/TEST/in.lc_generate_restart
index 8ae53c5c8e..30d593f2cd 100644
--- a/src/USER-INTEL/TEST/in.lc_generate_restart
+++ b/src/USER-INTEL/TEST/in.lc_generate_restart
@@ -4,13 +4,13 @@
# cutoff 4.0 with skin 0.8
# NPT, T=2.4, P=8.0
-variable x index 1
-variable y index 1
-variable z index 1
+variable xt index 1
+variable yt index 1
+variable zt index 1
-variable i equal $x*32
-variable j equal $y*32
-variable k equal $z*32
+variable i equal ${xt}*32
+variable j equal ${yt}*32
+variable k equal ${zt}*32
units lj
atom_style ellipsoid
diff --git a/src/USER-INTEL/TEST/run_benchmarks.sh b/src/USER-INTEL/TEST/run_benchmarks.sh
new file mode 100755
index 0000000000..10bd79e0d1
--- /dev/null
+++ b/src/USER-INTEL/TEST/run_benchmarks.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+#########################################################################
+# Adjust settings below for your system
+#########################################################################
+
+# --------------------- MPI Launch Command
+
+export MPI="mpirun"
+#export MPI="numactl -p 1 mpirun" # -- Systems w/ MCDRAM in flat mode
+
+# ------------- Name and location of the LAMMPS binary
+
+export LMP_BIN=../../lmp_intel_cpu_intelmpi
+#export LMP_BIN=../../lmp_knl
+
+# ------------- Directory containing the LAMMPS installation
+
+export LMP_ROOT=../../../
+
+# ------------- Number of physical cores (not HW threads)
+
+export LMP_CORES=36 # -- For Intel Xeon E5-2697v4 SKU
+#export LMP_CORES=68 # -- For Intel Xeon Phi x200 7250 SKU
+
+# ------------- Number of HW threads to use in tests
+
+export LMP_THREAD_LIST="2" # -- For 2 threads per core w/ HT enabled
+#export LMP_THREAD_LIST="2 4" # -- For 2 threads per core w/ HT enabled
+
+# ------------- MPI Tuning Parameters
+
+#export I_MPI_SHM_LMT=shm # -- Uncomment for Xeon Phi x200 series
+
+# ------------- Library locations for build
+
+#source /opt/intel/parallel_studio_xe_2017.2.050/psxevars.sh
+
+#########################################################################
+# End settings for your system
+#########################################################################
+
+export WORKLOADS="lj rhodo rhodo_lrt lc sw water eam"
+export LMP_ARGS="-pk intel 0 -sf intel -screen none -v d 1"
+export RLMP_ARGS="-pk intel 0 lrt yes -sf intel -screen none -v d 1"
+
+export LOG_DIR_HEADER=`echo $LMP_BIN | sed 's/\.\.\///g' | sed 's/\.\///g'`
+export LOG_DIR_HOST=`hostname`
+export DATE_STRING=`date +%s`
+export LOG_DIR=$LOG_DIR_HOST"_"$LOG_DIR_HEADER"_"$DATE_STRING
+mkdir $LOG_DIR
+
+export I_MPI_PIN_DOMAIN=core
+export I_MPI_FABRICS=shm
+export KMP_BLOCKTIME=0
+
+echo -n "Creating restart file...."
+$MPI -np $LMP_CORES $LMP_BIN -in in.lc_generate_restart -log none $LMP_ARGS
+echo "Done."
+for threads in $LMP_THREAD_LIST
+do
+ export OMP_NUM_THREADS=$threads
+ for workload in $WORKLOADS
+ do
+ export LOGFILE=$LOG_DIR/$workload.$LMP_CORES"c"$threads"t".log
+ echo "Running $LOGFILE"
+ cmd="$MPI -np $LMP_CORES $LMP_BIN -in in.intel.$workload -log $LOGFILE $LMP_ARGS";
+ rthreads=$threads
+ unset KMP_AFFINITY
+ $cmd
+
+ # - For benchmarks with PPPM, also try LRT mode
+ if [ $workload = "rhodo" ]; then
+ export LOGFILE=$LOG_DIR/$workload"_lrt".$LMP_CORES"c"$threads"t".log
+ cmd="$MPI -np $LMP_CORES $LMP_BIN -in in.intel.$workload -log $LOGFILE $RLMP_ARGS";
+ rthreads=$((threads-1))
+ export KMP_AFFINITY=none
+ export OMP_NUM_THREADS=$rthreads
+ echo " $cmd" >> $LOG_DIR/commands.info
+ $cmd
+ fi
+ done
+done
+
+# Performance reported by LAMMPS (Timesteps/second ignoring warm-up run)
+grep Perf $LOG_DIR/*.log | awk 'BEGIN{n=1}n%2==0{print $0}{n++}' | sed 's/\/day//g' | sed 's/steps\/s/steps_s/g' | sed 's/hours\/ns//g' | sed 's/.*\///g' | sed 's/\.log:Performance://g' | awk '{c=NF-1; print $1,$c}'
diff --git a/src/USER-INTEL/angle_charmm_intel.cpp b/src/USER-INTEL/angle_charmm_intel.cpp
index aafc765c6b..d55afd4742 100644
--- a/src/USER-INTEL/angle_charmm_intel.cpp
+++ b/src/USER-INTEL/angle_charmm_intel.cpp
@@ -37,7 +37,7 @@ typedef struct { int a,b,c,t; } int4_t;
/* ---------------------------------------------------------------------- */
-AngleCharmmIntel::AngleCharmmIntel(LAMMPS *lmp) : AngleCharmm(lmp)
+AngleCharmmIntel::AngleCharmmIntel(LAMMPS *lmp) : AngleCharmm(lmp)
{
suffix_flag |= Suffix::INTEL;
}
@@ -74,23 +74,23 @@ void AngleCharmmIntel::compute(int eflag, int vflag)
template
void AngleCharmmIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = 0;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -102,10 +102,10 @@ void AngleCharmmIntel::compute(int eflag, int vflag,
/* ---------------------------------------------------------------------- */
-template
-void AngleCharmmIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void AngleCharmmIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->nanglelist;
@@ -126,31 +126,42 @@ void AngleCharmmIntel::eval(const int vflag,
const int nthreads = tc;
acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5;
- if (EVFLAG) {
- if (EFLAG)
- oeangle = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- }
+ if (EFLAG) oeangle = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
- int nfrom, nto, tid;
+ int nfrom, npl, nto, tid;
+ #ifdef LMP_INTEL_USE_SIMDOFF
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int4_t * _noalias const anglelist =
+ const int4_t * _noalias const anglelist =
(int4_t *) neighbor->anglelist[0];
- for (int n = nfrom; n < nto; n++) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ acc_t seangle, sv0, sv1, sv2, sv3, sv4, sv5;
+ if (EFLAG) seangle = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
+ }
+ #pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
+ for (int n = nfrom; n < nto; n ++) {
+ #else
+ for (int n = nfrom; n < nto; n += npl) {
+ #endif
const int i1 = anglelist[n].a;
const int i2 = anglelist[n].b;
const int i3 = anglelist[n].c;
@@ -229,40 +240,58 @@ void AngleCharmmIntel::eval(const int vflag,
// apply force to each of 3 atoms
- if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += f1x;
- f[i1].y += f1y;
- f[i1].z += f1z;
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ #pragma simdoff
+ #endif
+ {
+ if (NEWTON_BOND || i1 < nlocal) {
+ f[i1].x += f1x;
+ f[i1].y += f1y;
+ f[i1].z += f1z;
+ }
+
+ if (NEWTON_BOND || i2 < nlocal) {
+ f[i2].x -= f1x + f3x;
+ f[i2].y -= f1y + f3y;
+ f[i2].z -= f1z + f3z;
+ }
+
+ if (NEWTON_BOND || i3 < nlocal) {
+ f[i3].x += f3x;
+ f[i3].y += f3y;
+ f[i3].z += f3z;
+ }
}
- if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x -= f1x + f3x;
- f[i2].y -= f1y + f3y;
- f[i2].z -= f1z + f3z;
- }
-
- if (NEWTON_BOND || i3 < nlocal) {
- f[i3].x += f3x;
- f[i3].y += f3y;
- f[i3].z += f3z;
- }
-
- if (EVFLAG) {
- IP_PRE_ev_tally_angle(EFLAG, eatom, vflag, eangle, i1, i2, i3,f1x,
- f1y, f1z, f3x, f3y, f3z, delx1, dely1, delz1,
- delx2, dely2, delz2, oeangle, f, NEWTON_BOND,
- nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
+ if (EFLAG || VFLAG) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2,
+ i3, f1x, f1y, f1z, f3x, f3y, f3z, delx1,
+ dely1, delz1, delx2, dely2, delz2, seangle,
+ f, NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3,
+ sv4, sv5);
+ #else
+ IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2,
+ i3, f1x, f1y, f1z, f3x, f3y, f3z, delx1,
+ dely1, delz1, delx2, dely2, delz2, oeangle,
+ f, NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3,
+ ov4, ov5);
+ #endif
}
} // for n
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ if (EFLAG) oeangle += seangle;
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2;
+ ov3 += sv3; ov4 += sv4; ov5 += sv5;
+ }
+ #endif
} // omp parallel
- if (EVFLAG) {
- if (EFLAG)
- energy += oeangle;
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- }
+ if (EFLAG) energy += oeangle;
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
}
fix->set_reduce_flag();
@@ -319,11 +348,11 @@ void AngleCharmmIntel::pack_force_const(ForceConst &fc,
template
void AngleCharmmIntel::ForceConst::set_ntypes(const int nangletypes,
- Memory *memory) {
+ Memory *memory) {
if (nangletypes != _nangletypes) {
if (_nangletypes > 0)
_memory->destroy(fc);
-
+
if (nangletypes > 0)
_memory->create(fc,nangletypes,"anglecharmmintel.fc");
}
diff --git a/src/USER-INTEL/angle_charmm_intel.h b/src/USER-INTEL/angle_charmm_intel.h
index a98007b3ef..342af31b8c 100644
--- a/src/USER-INTEL/angle_charmm_intel.h
+++ b/src/USER-INTEL/angle_charmm_intel.h
@@ -45,8 +45,8 @@ class AngleCharmmIntel : public AngleCharmm {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
diff --git a/src/USER-INTEL/angle_harmonic_intel.cpp b/src/USER-INTEL/angle_harmonic_intel.cpp
index f101fd9e1f..47e0add690 100644
--- a/src/USER-INTEL/angle_harmonic_intel.cpp
+++ b/src/USER-INTEL/angle_harmonic_intel.cpp
@@ -37,7 +37,7 @@ typedef struct { int a,b,c,t; } int4_t;
/* ---------------------------------------------------------------------- */
-AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp)
+AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp)
{
suffix_flag |= Suffix::INTEL;
}
@@ -74,23 +74,23 @@ void AngleHarmonicIntel::compute(int eflag, int vflag)
template
void AngleHarmonicIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = 0;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -102,10 +102,10 @@ void AngleHarmonicIntel::compute(int eflag, int vflag,
/* ---------------------------------------------------------------------- */
-template
-void AngleHarmonicIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void AngleHarmonicIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->nanglelist;
@@ -126,31 +126,42 @@ void AngleHarmonicIntel::eval(const int vflag,
const int nthreads = tc;
acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5;
- if (EVFLAG) {
- if (EFLAG)
- oeangle = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- }
+ if (EFLAG) oeangle = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
- int nfrom, nto, tid;
+ int nfrom, npl, nto, tid;
+ #ifdef LMP_INTEL_USE_SIMDOFF
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int4_t * _noalias const anglelist =
+ const int4_t * _noalias const anglelist =
(int4_t *) neighbor->anglelist[0];
- for (int n = nfrom; n < nto; n++) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ acc_t seangle, sv0, sv1, sv2, sv3, sv4, sv5;
+ if (EFLAG) seangle = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
+ }
+ #pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
+ for (int n = nfrom; n < nto; n ++) {
+ #else
+ for (int n = nfrom; n < nto; n += npl) {
+ #endif
const int i1 = anglelist[n].a;
const int i2 = anglelist[n].b;
const int i3 = anglelist[n].c;
@@ -211,40 +222,58 @@ void AngleHarmonicIntel::eval(const int vflag,
// apply force to each of 3 atoms
- if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += f1x;
- f[i1].y += f1y;
- f[i1].z += f1z;
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ #pragma simdoff
+ #endif
+ {
+ if (NEWTON_BOND || i1 < nlocal) {
+ f[i1].x += f1x;
+ f[i1].y += f1y;
+ f[i1].z += f1z;
+ }
+
+ if (NEWTON_BOND || i2 < nlocal) {
+ f[i2].x -= f1x + f3x;
+ f[i2].y -= f1y + f3y;
+ f[i2].z -= f1z + f3z;
+ }
+
+ if (NEWTON_BOND || i3 < nlocal) {
+ f[i3].x += f3x;
+ f[i3].y += f3y;
+ f[i3].z += f3z;
+ }
}
- if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x -= f1x + f3x;
- f[i2].y -= f1y + f3y;
- f[i2].z -= f1z + f3z;
- }
-
- if (NEWTON_BOND || i3 < nlocal) {
- f[i3].x += f3x;
- f[i3].y += f3y;
- f[i3].z += f3z;
- }
-
- if (EVFLAG) {
- IP_PRE_ev_tally_angle(EFLAG, eatom, vflag, eangle, i1, i2, i3,f1x,
- f1y, f1z, f3x, f3y, f3z, delx1, dely1, delz1,
- delx2, dely2, delz2, oeangle, f, NEWTON_BOND,
- nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
+ if (EFLAG || VFLAG) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3,
+ f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1,
+ delz1, delx2, dely2, delz2, seangle, f,
+ NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3, sv4,
+ sv5);
+ #else
+ IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3,
+ f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1,
+ delz1, delx2, dely2, delz2, oeangle, f,
+ NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, ov4,
+ ov5);
+ #endif
}
} // for n
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ if (EFLAG) oeangle += seangle;
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2;
+ ov3 += sv3; ov4 += sv4; ov5 += sv5;
+ }
+ #endif
} // omp parallel
- if (EVFLAG) {
- if (EFLAG)
- energy += oeangle;
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- }
+ if (EFLAG) energy += oeangle;
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
}
fix->set_reduce_flag();
@@ -299,11 +328,11 @@ void AngleHarmonicIntel::pack_force_const(ForceConst &fc,
template
void AngleHarmonicIntel::ForceConst::set_ntypes(const int nangletypes,
- Memory *memory) {
+ Memory *memory) {
if (nangletypes != _nangletypes) {
if (_nangletypes > 0)
_memory->destroy(fc);
-
+
if (nangletypes > 0)
_memory->create(fc,nangletypes,"anglecharmmintel.fc");
}
diff --git a/src/USER-INTEL/angle_harmonic_intel.h b/src/USER-INTEL/angle_harmonic_intel.h
index 340ea4b974..301fc7cc06 100644
--- a/src/USER-INTEL/angle_harmonic_intel.h
+++ b/src/USER-INTEL/angle_harmonic_intel.h
@@ -45,8 +45,8 @@ class AngleHarmonicIntel : public AngleHarmonic {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
diff --git a/src/USER-INTEL/bond_fene_intel.cpp b/src/USER-INTEL/bond_fene_intel.cpp
index e61ab9be84..bb96135b2d 100644
--- a/src/USER-INTEL/bond_fene_intel.cpp
+++ b/src/USER-INTEL/bond_fene_intel.cpp
@@ -33,7 +33,7 @@ typedef struct { int a,b,t; } int3_t;
/* ---------------------------------------------------------------------- */
-BondFENEIntel::BondFENEIntel(LAMMPS *lmp) : BondFENE(lmp)
+BondFENEIntel::BondFENEIntel(LAMMPS *lmp) : BondFENE(lmp)
{
suffix_flag |= Suffix::INTEL;
}
@@ -70,23 +70,23 @@ void BondFENEIntel::compute(int eflag, int vflag)
template
void BondFENEIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = 0;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -96,10 +96,10 @@ void BondFENEIntel::compute(int eflag, int vflag,
}
}
-template
-void BondFENEIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void BondFENEIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->nbondlist;
if (inum == 0) return;
@@ -119,32 +119,42 @@ void BondFENEIntel::eval(const int vflag,
const int nthreads = tc;
acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5;
- if (EVFLAG) {
- if (EFLAG)
- oebond = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- }
+ if (EFLAG) oebond = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
}
-
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
- int nfrom, nto, tid;
+ int nfrom, npl, nto, tid;
+ #ifdef LMP_INTEL_USE_SIMDOFF
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int3_t * _noalias const bondlist =
+ const int3_t * _noalias const bondlist =
(int3_t *) neighbor->bondlist[0];
- for (int n = nfrom; n < nto; n++) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5;
+ if (EFLAG) sebond = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
+ }
+ #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
+ for (int n = nfrom; n < nto; n ++) {
+ #else
+ for (int n = nfrom; n < nto; n += npl) {
+ #endif
const int i1 = bondlist[n].a;
const int i2 = bondlist[n].b;
const int type = bondlist[n].t;
@@ -166,7 +176,7 @@ void BondFENEIntel::eval(const int vflag,
// if r -> r0, then rlogarg < 0.0 which is an error
// issue a warning and reset rlogarg = epsilon
// if r > 2*r0 something serious is wrong, abort
-
+
if (rlogarg < (flt_t)0.1) {
char str[128];
sprintf(str,"FENE bond too long: " BIGINT_FORMAT " "
@@ -176,18 +186,18 @@ void BondFENEIntel::eval(const int vflag,
if (rlogarg <= (flt_t)-3.0) error->one(FLERR,"Bad FENE bond");
rlogarg = (flt_t)0.1;
}
-
+
flt_t fbond = -k/rlogarg;
-
+
// force from LJ term
-
+
flt_t sr2,sr6;
if (rsq < (flt_t)TWO_1_3*sigmasq) {
- sr2 = sigmasq * irsq;
+ sr2 = sigmasq * irsq;
sr6 = sr2 * sr2 * sr2;
fbond += (flt_t)48.0 * epsilon * sr6 * (sr6 - (flt_t)0.5) * irsq;
}
-
+
// energy
flt_t ebond;
@@ -199,33 +209,48 @@ void BondFENEIntel::eval(const int vflag,
// apply force to each of 2 atoms
- if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += delx*fbond;
- f[i1].y += dely*fbond;
- f[i1].z += delz*fbond;
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ #pragma simdoff
+ #endif
+ {
+ if (NEWTON_BOND || i1 < nlocal) {
+ f[i1].x += delx*fbond;
+ f[i1].y += dely*fbond;
+ f[i1].z += delz*fbond;
+ }
+
+ if (NEWTON_BOND || i2 < nlocal) {
+ f[i2].x -= delx*fbond;
+ f[i2].y -= dely*fbond;
+ f[i2].z -= delz*fbond;
+ }
}
- if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x -= delx*fbond;
- f[i2].y -= dely*fbond;
- f[i2].z -= delz*fbond;
- }
-
- if (EVFLAG) {
- IP_PRE_ev_tally_bond(EFLAG, eatom, vflag, ebond, i1, i2, fbond,
- delx, dely, delz, oebond, f, NEWTON_BOND,
+ if (EFLAG || VFLAG) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond,
+ delx, dely, delz, sebond, f, NEWTON_BOND,
+ nlocal, sv0, sv1, sv2, sv3, sv4, sv5);
+ #else
+ IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond,
+ delx, dely, delz, oebond, f, NEWTON_BOND,
nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
+ #endif
}
} // for n
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ if (EFLAG) oebond += sebond;
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2;
+ ov3 += sv3; ov4 += sv4; ov5 += sv5;
+ }
+ #endif
} // omp parallel
- if (EVFLAG) {
- if (EFLAG)
- energy += oebond;
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- }
+ if (EFLAG) energy += oebond;
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
}
fix->set_reduce_flag();
@@ -282,11 +307,11 @@ void BondFENEIntel::pack_force_const(ForceConst &fc,
template
void BondFENEIntel::ForceConst::set_ntypes(const int nbondtypes,
- Memory *memory) {
+ Memory *memory) {
if (nbondtypes != _nbondtypes) {
if (_nbondtypes > 0)
_memory->destroy(fc);
-
+
if (nbondtypes > 0)
_memory->create(fc,nbondtypes,"bondfeneintel.fc");
}
diff --git a/src/USER-INTEL/bond_fene_intel.h b/src/USER-INTEL/bond_fene_intel.h
index d64f1e7254..89c3033096 100644
--- a/src/USER-INTEL/bond_fene_intel.h
+++ b/src/USER-INTEL/bond_fene_intel.h
@@ -45,8 +45,8 @@ class BondFENEIntel : public BondFENE {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
diff --git a/src/USER-INTEL/bond_harmonic_intel.cpp b/src/USER-INTEL/bond_harmonic_intel.cpp
index 51a33b1cc3..beb0ebcdda 100644
--- a/src/USER-INTEL/bond_harmonic_intel.cpp
+++ b/src/USER-INTEL/bond_harmonic_intel.cpp
@@ -33,7 +33,7 @@ typedef struct { int a,b,t; } int3_t;
/* ---------------------------------------------------------------------- */
-BondHarmonicIntel::BondHarmonicIntel(LAMMPS *lmp) : BondHarmonic(lmp)
+BondHarmonicIntel::BondHarmonicIntel(LAMMPS *lmp) : BondHarmonic(lmp)
{
suffix_flag |= Suffix::INTEL;
}
@@ -70,23 +70,23 @@ void BondHarmonicIntel::compute(int eflag, int vflag)
template
void BondHarmonicIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = 0;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -96,10 +96,10 @@ void BondHarmonicIntel::compute(int eflag, int vflag,
}
}
-template
-void BondHarmonicIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void BondHarmonicIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->nbondlist;
if (inum == 0) return;
@@ -119,31 +119,42 @@ void BondHarmonicIntel::eval(const int vflag,
const int nthreads = tc;
acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5;
- if (EVFLAG) {
- if (EFLAG)
- oebond = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- }
+ if (EFLAG) oebond = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
- int nfrom, nto, tid;
+ int nfrom, npl, nto, tid;
+ #ifdef LMP_INTEL_USE_SIMDOFF
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int3_t * _noalias const bondlist =
+ const int3_t * _noalias const bondlist =
(int3_t *) neighbor->bondlist[0];
- for (int n = nfrom; n < nto; n++) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5;
+ if (EFLAG) sebond = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
+ }
+ #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
+ for (int n = nfrom; n < nto; n ++) {
+ #else
+ for (int n = nfrom; n < nto; n += npl) {
+ #endif
const int i1 = bondlist[n].a;
const int i2 = bondlist[n].b;
const int type = bondlist[n].t;
@@ -167,33 +178,50 @@ void BondHarmonicIntel::eval(const int vflag,
if (EFLAG) ebond = rk*dr;
// apply force to each of 2 atoms
- if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += delx*fbond;
- f[i1].y += dely*fbond;
- f[i1].z += delz*fbond;
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ #pragma simdoff
+ #endif
+ {
+ if (NEWTON_BOND || i1 < nlocal) {
+ f[i1].x += delx*fbond;
+ f[i1].y += dely*fbond;
+ f[i1].z += delz*fbond;
+ }
+
+ if (NEWTON_BOND || i2 < nlocal) {
+ f[i2].x -= delx*fbond;
+ f[i2].y -= dely*fbond;
+ f[i2].z -= delz*fbond;
+ }
}
- if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x -= delx*fbond;
- f[i2].y -= dely*fbond;
- f[i2].z -= delz*fbond;
- }
-
- if (EVFLAG) {
- IP_PRE_ev_tally_bond(EFLAG, eatom, vflag, ebond, i1, i2, fbond,
- delx, dely, delz, oebond, f, NEWTON_BOND,
- nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
+ if (EFLAG || VFLAG) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2,
+ fbond, delx, dely, delz, sebond, f,
+ NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3,
+ sv4, sv5);
+ #else
+ IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2,
+ fbond, delx, dely, delz, oebond, f,
+ NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3,
+ ov4, ov5);
+ #endif
}
} // for n
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ if (EFLAG) oebond += sebond;
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2;
+ ov3 += sv3; ov4 += sv4; ov5 += sv5;
+ }
+ #endif
} // omp parallel
- if (EVFLAG) {
- if (EFLAG)
- energy += oebond;
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- }
+ if (EFLAG) energy += oebond;
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
}
fix->set_reduce_flag();
@@ -248,11 +276,11 @@ void BondHarmonicIntel::pack_force_const(ForceConst &fc,
template
void BondHarmonicIntel::ForceConst::set_ntypes(const int nbondtypes,
- Memory *memory) {
+ Memory *memory) {
if (nbondtypes != _nbondtypes) {
if (_nbondtypes > 0)
_memory->destroy(fc);
-
+
if (nbondtypes > 0)
_memory->create(fc,nbondtypes,"bondharmonicintel.fc");
}
diff --git a/src/USER-INTEL/bond_harmonic_intel.h b/src/USER-INTEL/bond_harmonic_intel.h
index 0de844cddf..8fc04f432a 100644
--- a/src/USER-INTEL/bond_harmonic_intel.h
+++ b/src/USER-INTEL/bond_harmonic_intel.h
@@ -45,8 +45,8 @@ class BondHarmonicIntel : public BondHarmonic {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
diff --git a/src/USER-INTEL/dihedral_charmm_intel.cpp b/src/USER-INTEL/dihedral_charmm_intel.cpp
index c07c226611..715cef4d37 100644
--- a/src/USER-INTEL/dihedral_charmm_intel.cpp
+++ b/src/USER-INTEL/dihedral_charmm_intel.cpp
@@ -80,8 +80,8 @@ void DihedralCharmmIntel::compute(int eflag, int vflag)
template
void DihedralCharmmIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
@@ -93,16 +93,16 @@ void DihedralCharmmIntel::compute(int eflag, int vflag,
force->pair->vflag_either = force->pair->vflag_global = 1;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -114,10 +114,10 @@ void DihedralCharmmIntel::compute(int eflag, int vflag,
#ifndef LMP_USE_AVXCD_DHC
-template
-void DihedralCharmmIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void DihedralCharmmIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->ndihedrallist;
@@ -140,50 +140,50 @@ void DihedralCharmmIntel::eval(const int vflag,
acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5;
acc_t oevdwl, oecoul, opv0, opv1, opv2, opv3, opv4, opv5;
- if (EVFLAG) {
- if (EFLAG)
- oevdwl = oecoul = oedihedral = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0;
- }
+ if (EFLAG) oevdwl = oecoul = oedihedral = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
+ opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oevdwl,oecoul,oedihedral,ov0,ov1,ov2,ov3,ov4,ov5, \
- opv0,opv1,opv2,opv3,opv4,opv5)
+ opv0,opv1,opv2,opv3,opv4,opv5)
#endif
{
+ #if defined(LMP_SIMD_COMPILER_TEST)
int nfrom, nto, tid;
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ int nfrom, npl, nto, tid;
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int5_t * _noalias const dihedrallist =
+ const int5_t * _noalias const dihedrallist =
(int5_t *) neighbor->dihedrallist[0];
const flt_t qqrd2e = force->qqrd2e;
acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5;
acc_t sevdwl, secoul, spv0, spv1, spv2, spv3, spv4, spv5;
- if (EVFLAG) {
- if (EFLAG)
- sevdwl = secoul = sedihedral = (acc_t)0.0;
- if (vflag) {
- sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
- spv0 = spv1 = spv2 = spv3 = spv4 = spv5 = (acc_t)0.0;
- }
+ if (EFLAG) sevdwl = secoul = sedihedral = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
+ spv0 = spv1 = spv2 = spv3 = spv4 = spv5 = (acc_t)0.0;
}
#if defined(LMP_SIMD_COMPILER_TEST)
#pragma vector aligned
#pragma simd reduction(+:sedihedral, sevdwl, secoul, sv0, sv1, sv2, \
- sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, spv5)
- #endif
+ sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, spv5)
for (int n = nfrom; n < nto; n++) {
+ #endif
+ for (int n = nfrom; n < nto; n += npl) {
const int i1 = dihedrallist[n].a;
const int i2 = dihedrallist[n].b;
const int i3 = dihedrallist[n].c;
@@ -204,7 +204,7 @@ void DihedralCharmmIntel::eval(const int vflag,
const flt_t vb2zm = x[i2].z - x[i3].z;
// 3rd bond
-
+
const flt_t vb3x = x[i4].x - x[i3].x;
const flt_t vb3y = x[i4].y - x[i3].y;
const flt_t vb3z = x[i4].z - x[i3].z;
@@ -244,25 +244,25 @@ void DihedralCharmmIntel::eval(const int vflag,
// error check
#ifndef LMP_SIMD_COMPILER_TEST
if (c > PTOLERANCE || c < MTOLERANCE) {
- int me = comm->me;
+ int me = comm->me;
- if (screen) {
- char str[128];
- sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
- TAGINT_FORMAT " " TAGINT_FORMAT " "
- TAGINT_FORMAT " " TAGINT_FORMAT,
- me,tid,update->ntimestep,
- atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
- error->warning(FLERR,str,0);
- fprintf(screen," 1st atom: %d %g %g %g\n",
- me,x[i1].x,x[i1].y,x[i1].z);
- fprintf(screen," 2nd atom: %d %g %g %g\n",
- me,x[i2].x,x[i2].y,x[i2].z);
- fprintf(screen," 3rd atom: %d %g %g %g\n",
- me,x[i3].x,x[i3].y,x[i3].z);
- fprintf(screen," 4th atom: %d %g %g %g\n",
- me,x[i4].x,x[i4].y,x[i4].z);
- }
+ if (screen) {
+ char str[128];
+ sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
+ TAGINT_FORMAT " " TAGINT_FORMAT " "
+ TAGINT_FORMAT " " TAGINT_FORMAT,
+ me,tid,update->ntimestep,
+ atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+ error->warning(FLERR,str,0);
+ fprintf(screen," 1st atom: %d %g %g %g\n",
+ me,x[i1].x,x[i1].y,x[i1].z);
+ fprintf(screen," 2nd atom: %d %g %g %g\n",
+ me,x[i2].x,x[i2].y,x[i2].z);
+ fprintf(screen," 3rd atom: %d %g %g %g\n",
+ me,x[i3].x,x[i3].y,x[i3].z);
+ fprintf(screen," 4th atom: %d %g %g %g\n",
+ me,x[i4].x,x[i4].y,x[i4].z);
+ }
}
#endif
@@ -279,19 +279,19 @@ void DihedralCharmmIntel::eval(const int vflag,
ddf1 = df1 = (flt_t)0.0;
for (int i = 0; i < m; i++) {
- ddf1 = p*c - df1*s;
- df1 = p*s + df1*c;
- p = ddf1;
+ ddf1 = p*c - df1*s;
+ df1 = p*s + df1*c;
+ p = ddf1;
}
p = p*tcos_shift + df1*tsin_shift;
df1 = df1*tcos_shift - ddf1*tsin_shift;
df1 *= -m;
p += (flt_t)1.0;
-
+
if (m == 0) {
- p = (flt_t)1.0 + tcos_shift;
- df1 = (flt_t)0.0;
+ p = (flt_t)1.0 + tcos_shift;
+ df1 = (flt_t)0.0;
}
const flt_t fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
@@ -333,14 +333,14 @@ void DihedralCharmmIntel::eval(const int vflag,
const flt_t f3y = -sy2 - f4y;
const flt_t f3z = -sz2 - f4z;
- if (EVFLAG) {
- flt_t deng;
- if (EFLAG) deng = tk * p;
- IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, deng, i1, i2, i3, i4, f1x,
- f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x,
- vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y,
- vb3z, sedihedral, f, NEWTON_BOND, nlocal,
- sv0, sv1, sv2, sv3, sv4, sv5);
+ if (EFLAG || VFLAG) {
+ flt_t deng;
+ if (EFLAG) deng = tk * p;
+ IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3,
+ i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y,
+ f4z, vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm,
+ vb3x, vb3y, vb3z, sedihedral, f, NEWTON_BOND,
+ nlocal, sv0, sv1, sv2, sv3, sv4, sv5);
}
@@ -349,15 +349,15 @@ void DihedralCharmmIntel::eval(const int vflag,
#endif
{
if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x += f2x;
- f[i2].y += f2y;
- f[i2].z += f2z;
+ f[i2].x += f2x;
+ f[i2].y += f2y;
+ f[i2].z += f2z;
}
if (NEWTON_BOND || i3 < nlocal) {
- f[i3].x += f3x;
- f[i3].y += f3y;
- f[i3].z += f3z;
+ f[i3].x += f3x;
+ f[i3].y += f3y;
+ f[i3].z += f3z;
}
}
@@ -372,54 +372,54 @@ void DihedralCharmmIntel::eval(const int vflag,
flt_t forcecoul;
if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv;
else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv);
- const flt_t forcelj = r6inv * (fc.ljp[itype][jtype].lj1*r6inv -
- fc.ljp[itype][jtype].lj2);
+ const flt_t forcelj = r6inv * (fc.ljp[itype][jtype].lj1*r6inv -
+ fc.ljp[itype][jtype].lj2);
const flt_t fpair = tweight * (forcelj+forcecoul)*r2inv;
if (NEWTON_BOND || i1 < nlocal) {
- f1x += delx*fpair;
- f1y += dely*fpair;
- f1z += delz*fpair;
+ f1x += delx*fpair;
+ f1y += dely*fpair;
+ f1z += delz*fpair;
}
if (NEWTON_BOND || i4 < nlocal) {
- f4x -= delx*fpair;
- f4y -= dely*fpair;
- f4z -= delz*fpair;
+ f4x -= delx*fpair;
+ f4y -= dely*fpair;
+ f4z -= delz*fpair;
}
- if (EVFLAG) {
- flt_t ev_pre = (flt_t)0;
- if (NEWTON_BOND || i1 < nlocal)
- ev_pre += (flt_t)0.5;
- if (NEWTON_BOND || i4 < nlocal)
- ev_pre += (flt_t)0.5;
+ if (EFLAG || VFLAG) {
+ flt_t ev_pre = (flt_t)0;
+ if (NEWTON_BOND || i1 < nlocal)
+ ev_pre += (flt_t)0.5;
+ if (NEWTON_BOND || i4 < nlocal)
+ ev_pre += (flt_t)0.5;
- if (EFLAG) {
- flt_t ecoul, evdwl;
- ecoul = tweight * forcecoul;
- evdwl = tweight * r6inv * (fc.ljp[itype][jtype].lj3*r6inv -
- fc.ljp[itype][jtype].lj4);
- secoul += ev_pre * ecoul;
- sevdwl += ev_pre * evdwl;
- if (eatom) {
- evdwl *= (flt_t)0.5;
- evdwl += (flt_t)0.5 * ecoul;
- if (NEWTON_BOND || i1 < nlocal)
- f[i1].w += evdwl;
- if (NEWTON_BOND || i4 < nlocal)
- f[i4].w += evdwl;
- }
- }
- // IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
- // delx, dely, delz);
- if (vflag) {
- spv0 += ev_pre * delx * delx * fpair;
- spv1 += ev_pre * dely * dely * fpair;
- spv2 += ev_pre * delz * delz * fpair;
- spv3 += ev_pre * delx * dely * fpair;
- spv4 += ev_pre * delx * delz * fpair;
- spv5 += ev_pre * dely * delz * fpair;
- }
+ if (EFLAG) {
+ flt_t ecoul, evdwl;
+ ecoul = tweight * forcecoul;
+ evdwl = tweight * r6inv * (fc.ljp[itype][jtype].lj3*r6inv -
+ fc.ljp[itype][jtype].lj4);
+ secoul += ev_pre * ecoul;
+ sevdwl += ev_pre * evdwl;
+ if (eatom) {
+ evdwl *= (flt_t)0.5;
+ evdwl += (flt_t)0.5 * ecoul;
+ if (NEWTON_BOND || i1 < nlocal)
+ f[i1].w += evdwl;
+ if (NEWTON_BOND || i4 < nlocal)
+ f[i4].w += evdwl;
+ }
+ }
+ // IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
+ // delx, dely, delz);
+ if (VFLAG && vflag) {
+ spv0 += ev_pre * delx * delx * fpair;
+ spv1 += ev_pre * dely * dely * fpair;
+ spv2 += ev_pre * delz * delz * fpair;
+ spv3 += ev_pre * delx * dely * fpair;
+ spv4 += ev_pre * delx * delz * fpair;
+ spv5 += ev_pre * dely * delz * fpair;
+ }
}
// apply force to each of 4 atoms
@@ -428,48 +428,44 @@ void DihedralCharmmIntel::eval(const int vflag,
#endif
{
if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += f1x;
- f[i1].y += f1y;
- f[i1].z += f1z;
+ f[i1].x += f1x;
+ f[i1].y += f1y;
+ f[i1].z += f1z;
}
if (NEWTON_BOND || i4 < nlocal) {
- f[i4].x += f4x;
- f[i4].y += f4y;
- f[i4].z += f4z;
+ f[i4].x += f4x;
+ f[i4].y += f4y;
+ f[i4].z += f4z;
}
}
} // for n
- if (EVFLAG) {
- if (EFLAG) {
- oedihedral += sedihedral;
- oecoul += secoul;
- oevdwl += sevdwl;
- }
- if (vflag) {
- ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5;
- opv0 += spv0; opv1 += spv1; opv2 += spv2;
- opv3 += spv3; opv4 += spv4; opv5 += spv5;
- }
+ if (EFLAG) {
+ oedihedral += sedihedral;
+ oecoul += secoul;
+ oevdwl += sevdwl;
+ }
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5;
+ opv0 += spv0; opv1 += spv1; opv2 += spv2;
+ opv3 += spv3; opv4 += spv4; opv5 += spv5;
}
} // omp parallel
- if (EVFLAG) {
- if (EFLAG) {
- energy += oedihedral;
- force->pair->eng_vdwl += oevdwl;
- force->pair->eng_coul += oecoul;
- }
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- force->pair->virial[0] += opv0;
- force->pair->virial[1] += opv1;
- force->pair->virial[2] += opv2;
- force->pair->virial[3] += opv3;
- force->pair->virial[4] += opv4;
- force->pair->virial[5] += opv5;
- }
+ if (EFLAG) {
+ energy += oedihedral;
+ force->pair->eng_vdwl += oevdwl;
+ force->pair->eng_coul += oecoul;
+ }
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
+ force->pair->virial[0] += opv0;
+ force->pair->virial[1] += opv1;
+ force->pair->virial[2] += opv2;
+ force->pair->virial[3] += opv3;
+ force->pair->virial[4] += opv4;
+ force->pair->virial[5] += opv5;
}
fix->set_reduce_flag();
@@ -488,10 +484,10 @@ authors for more details.
------------------------------------------------------------------------- */
-template
-void DihedralCharmmIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void DihedralCharmmIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
typedef typename SIMD_type::SIMD_vec SIMD_flt_t;
@@ -518,30 +514,28 @@ void DihedralCharmmIntel::eval(const int vflag,
acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5;
acc_t oevdwl, oecoul, opv0, opv1, opv2, opv3, opv4, opv5;
- if (EVFLAG) {
- if (EFLAG)
- oevdwl = oecoul = oedihedral = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0;
- }
+ if (EFLAG) oevdwl = oecoul = oedihedral = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
+ opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oevdwl,oecoul,oedihedral,ov0,ov1,ov2,ov3,ov4,ov5, \
- opv0,opv1,opv2,opv3,opv4,opv5)
+ opv0,opv1,opv2,opv3,opv4,opv5)
#endif
{
- int nfrom, nto, tid;
- IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ int nfrom, npl, nto, tid;
+ IP_PRE_omp_stride_id_vec(nfrom, npl, nto, tid, inum, nthreads,
+ swidth);
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int * _noalias const dihedrallist =
+ const int * _noalias const dihedrallist =
(int *) neighbor->dihedrallist[0];
const flt_t * _noalias const weight = &(fc.weight[0]);
const flt_t * _noalias const x_f = &(x[0].x);
@@ -559,36 +553,34 @@ void DihedralCharmmIntel::eval(const int vflag,
SIMD_acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5;
SIMD_acc_t sevdwl, secoul, spv0, spv1, spv2, spv3, spv4, spv5;
- if (EVFLAG) {
- if (EFLAG) {
- sevdwl = SIMD_set((acc_t)0.0);
- secoul = SIMD_set((acc_t)0.0);
- sedihedral = SIMD_set((acc_t)0.0);
- }
- if (vflag) {
- sv0 = SIMD_set((acc_t)0.0);
- sv1 = SIMD_set((acc_t)0.0);
- sv2 = SIMD_set((acc_t)0.0);
- sv3 = SIMD_set((acc_t)0.0);
- sv4 = SIMD_set((acc_t)0.0);
- sv5 = SIMD_set((acc_t)0.0);
- spv0 = SIMD_set((acc_t)0.0);
- spv1 = SIMD_set((acc_t)0.0);
- spv2 = SIMD_set((acc_t)0.0);
- spv3 = SIMD_set((acc_t)0.0);
- spv4 = SIMD_set((acc_t)0.0);
- spv5 = SIMD_set((acc_t)0.0);
- }
+ if (EFLAG) {
+ sevdwl = SIMD_set((acc_t)0.0);
+ secoul = SIMD_set((acc_t)0.0);
+ sedihedral = SIMD_set((acc_t)0.0);
+ }
+ if (VFLAG && vflag) {
+ sv0 = SIMD_set((acc_t)0.0);
+ sv1 = SIMD_set((acc_t)0.0);
+ sv2 = SIMD_set((acc_t)0.0);
+ sv3 = SIMD_set((acc_t)0.0);
+ sv4 = SIMD_set((acc_t)0.0);
+ sv5 = SIMD_set((acc_t)0.0);
+ spv0 = SIMD_set((acc_t)0.0);
+ spv1 = SIMD_set((acc_t)0.0);
+ spv2 = SIMD_set((acc_t)0.0);
+ spv3 = SIMD_set((acc_t)0.0);
+ spv4 = SIMD_set((acc_t)0.0);
+ spv5 = SIMD_set((acc_t)0.0);
}
SIMD_int n_offset = SIMD_set(0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50,
- 55, 60, 65, 70, 75) + (nfrom * 5);
+ 55, 60, 65, 70, 75) + (nfrom * 5);
const int nto5 = nto * 5;
const int nlocals4 = nlocal << 4;
const SIMD_int simd_nlocals4 = SIMD_set(nlocals4);
const int ntypes = atom->ntypes + 1;
- for (int n = nfrom; n < nto; n += swidth) {
+ for (int n = nfrom; n < nto; n += npl) {
SIMD_mask nmask = n_offset < nto5;
SIMD_int i1 = SIMD_gather(nmask, dihedrallist, n_offset);
const SIMD_flt_t q1 = SIMD_gather(nmask, q, i1);
@@ -601,7 +593,7 @@ void DihedralCharmmIntel::eval(const int vflag,
SIMD_int type = SIMD_gather(nmask, dihedrallist+4, n_offset);
const SIMD_flt_t tweight = SIMD_gather(nmask, weight, type);
type = type << 2;
- n_offset = n_offset + swidth * 5;
+ n_offset = n_offset + npl * 5;
// 1st bond
@@ -626,7 +618,7 @@ void DihedralCharmmIntel::eval(const int vflag,
const SIMD_flt_t vb2zm = z2 - z3;
// 3rd bond
-
+
SIMD_flt_t x4, y4, z4;
SIMD_int jtype;
@@ -672,7 +664,7 @@ void DihedralCharmmIntel::eval(const int vflag,
const SIMD_flt_t ptol = SIMD_set(PTOLERANCE);
const SIMD_flt_t ntol = SIMD_set(MTOLERANCE);
if (c > ptol || c < ntol)
- if (screen)
+ if (screen)
error->warning(FLERR,"Dihedral problem.");
c = SIMD_set(c, c > one, one);
@@ -686,14 +678,14 @@ void DihedralCharmmIntel::eval(const int vflag,
SIMD_flt_t p(one);
SIMD_flt_t ddf1(szero);
SIMD_flt_t df1(szero);
-
+
const int m_max = SIMD_max(m);
for (int i = 0; i < m_max; i++) {
- const SIMD_mask my_m = i < m;
- ddf1 = SIMD_set(ddf1, my_m, p*c - df1*s);
- df1 = SIMD_set(df1, my_m, p*s + df1*c);
- p = SIMD_set(p, my_m, ddf1);
+ const SIMD_mask my_m = i < m;
+ ddf1 = SIMD_set(ddf1, my_m, p*c - df1*s);
+ df1 = SIMD_set(df1, my_m, p*s + df1*c);
+ p = SIMD_set(p, my_m, ddf1);
}
SIMD_flt_t multf;
@@ -702,7 +694,7 @@ void DihedralCharmmIntel::eval(const int vflag,
df1 = df1*tcos_shift - ddf1*tsin_shift;
df1 = df1 * multf;
p = p + one;
-
+
SIMD_mask mzero = (m == SIMD_set((int)0));
p = SIMD_set(p, mzero, one + tcos_shift);
df1 = SIMD_set(df1, mzero, szero);
@@ -747,41 +739,41 @@ void DihedralCharmmIntel::eval(const int vflag,
SIMD_flt_t f3z = -sz2 - f4z;
SIMD_flt_t qdeng;
- if (EVFLAG) {
- SIMD_flt_t ev_pre;
- if (NEWTON_BOND) ev_pre = one;
- else {
- ev_pre = szero;
- const SIMD_flt_t quarter = SIMD_set((flt_t)0.25);
- ev_pre = SIMD_add(ev_pre, i1 < simd_nlocals4, ev_pre, quarter);
- ev_pre = SIMD_add(ev_pre, i2 < simd_nlocals4, ev_pre, quarter);
- ev_pre = SIMD_add(ev_pre, i3 < simd_nlocals4, ev_pre, quarter);
- ev_pre = SIMD_add(ev_pre, i4 < simd_nlocals4, ev_pre, quarter);
- }
- SIMD_zero_masked(nmask, ev_pre);
- if (EFLAG) {
- const SIMD_flt_t deng = tk * p;
- sedihedral = SIMD_ev_add(sedihedral, ev_pre * deng);
- if (eatom) {
- qdeng = deng * SIMD_set((flt_t)0.25);
- SIMD_mask newton_mask;
- if (NEWTON_BOND) newton_mask = nmask;
- if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i2, simd_nlocals4);
- SIMD_flt_t ieng = qdeng;
- SIMD_jeng_update(newton_mask, featom, i2, ieng);
- ieng = qdeng;
- if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i3, simd_nlocals4);
- SIMD_jeng_update(newton_mask, featom, i3, ieng);
- }
- }
- if (vflag) {
+ if (EFLAG || VFLAG) {
+ SIMD_flt_t ev_pre;
+ if (NEWTON_BOND) ev_pre = one;
+ else {
+ ev_pre = szero;
+ const SIMD_flt_t quarter = SIMD_set((flt_t)0.25);
+ ev_pre = SIMD_add(ev_pre, i1 < simd_nlocals4, ev_pre, quarter);
+ ev_pre = SIMD_add(ev_pre, i2 < simd_nlocals4, ev_pre, quarter);
+ ev_pre = SIMD_add(ev_pre, i3 < simd_nlocals4, ev_pre, quarter);
+ ev_pre = SIMD_add(ev_pre, i4 < simd_nlocals4, ev_pre, quarter);
+ }
+ SIMD_zero_masked(nmask, ev_pre);
+ if (EFLAG) {
+ const SIMD_flt_t deng = tk * p;
+ sedihedral = SIMD_ev_add(sedihedral, ev_pre * deng);
+ if (eatom) {
+ qdeng = deng * SIMD_set((flt_t)0.25);
+ SIMD_mask newton_mask;
+ if (NEWTON_BOND) newton_mask = nmask;
+ if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i2, simd_nlocals4);
+ SIMD_flt_t ieng = qdeng;
+ SIMD_jeng_update(newton_mask, featom, i2, ieng);
+ ieng = qdeng;
+ if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i3, simd_nlocals4);
+ SIMD_jeng_update(newton_mask, featom, i3, ieng);
+ }
+ }
+ if (VFLAG && vflag) {
sv0 = SIMD_ev_add(sv0, ev_pre*(vb1x*f1x-vb2xm*f3x+(vb3x-vb2xm)*f4x));
- sv1 = SIMD_ev_add(sv1, ev_pre*(vb1y*f1y-vb2ym*f3y+(vb3y-vb2ym)*f4y));
- sv2 = SIMD_ev_add(sv2, ev_pre*(vb1z*f1z-vb2zm*f3z+(vb3z-vb2zm)*f4z));
- sv3 = SIMD_ev_add(sv3, ev_pre*(vb1x*f1y-vb2xm*f3y+(vb3x-vb2xm)*f4y));
- sv4 = SIMD_ev_add(sv4, ev_pre*(vb1x*f1z-vb2xm*f3z+(vb3x-vb2xm)*f4z));
- sv5 = SIMD_ev_add(sv5, ev_pre*(vb1y*f1z-vb2ym*f3z+(vb3y-vb2ym)*f4z));
- }
+ sv1 = SIMD_ev_add(sv1, ev_pre*(vb1y*f1y-vb2ym*f3y+(vb3y-vb2ym)*f4y));
+ sv2 = SIMD_ev_add(sv2, ev_pre*(vb1z*f1z-vb2zm*f3z+(vb3z-vb2zm)*f4z));
+ sv3 = SIMD_ev_add(sv3, ev_pre*(vb1x*f1y-vb2xm*f3y+(vb3x-vb2xm)*f4y));
+ sv4 = SIMD_ev_add(sv4, ev_pre*(vb1x*f1z-vb2xm*f3z+(vb3x-vb2xm)*f4z));
+ sv5 = SIMD_ev_add(sv5, ev_pre*(vb1y*f1z-vb2ym*f3z+(vb3y-vb2ym)*f4z));
+ }
}
SIMD_mask newton_mask;
@@ -816,28 +808,28 @@ void DihedralCharmmIntel::eval(const int vflag,
f4y = f4y - dely * fpair;
f4z = f4z - delz * fpair;
- if (EVFLAG) {
- SIMD_flt_t ev_pre;
- if (NEWTON_BOND) ev_pre = one;
- else {
- ev_pre = szero;
+ if (EFLAG || VFLAG) {
+ SIMD_flt_t ev_pre;
+ if (NEWTON_BOND) ev_pre = one;
+ else {
+ ev_pre = szero;
const SIMD_flt_t half = SIMD_set((flt_t)0.5);
ev_pre = SIMD_add(ev_pre, i1 < simd_nlocals4,ev_pre,half);
ev_pre = SIMD_add(ev_pre, i4 < simd_nlocals4,ev_pre,half);
- }
- SIMD_zero_masked(nmask, ev_pre);
+ }
+ SIMD_zero_masked(nmask, ev_pre);
- if (EFLAG) {
- const SIMD_flt_t ecoul = tweight * forcecoul;
- const SIMD_flt_t lj3 = SIMD_gather(nmask, plj3, ijtype);
- const SIMD_flt_t lj4 = SIMD_gather(nmask, plj4, ijtype);
- SIMD_flt_t evdwl = tweight * r6inv * (lj3 * r6inv - lj4);
- secoul = SIMD_ev_add(secoul, ev_pre * ecoul);
- sevdwl = SIMD_ev_add(sevdwl, ev_pre * evdwl);
- if (eatom) {
- const SIMD_flt_t half = SIMD_set((flt_t)0.5);
- evdwl = evdwl * half;
- evdwl = evdwl + half * ecoul + qdeng;
+ if (EFLAG) {
+ const SIMD_flt_t ecoul = tweight * forcecoul;
+ const SIMD_flt_t lj3 = SIMD_gather(nmask, plj3, ijtype);
+ const SIMD_flt_t lj4 = SIMD_gather(nmask, plj4, ijtype);
+ SIMD_flt_t evdwl = tweight * r6inv * (lj3 * r6inv - lj4);
+ secoul = SIMD_ev_add(secoul, ev_pre * ecoul);
+ sevdwl = SIMD_ev_add(sevdwl, ev_pre * evdwl);
+ if (eatom) {
+ const SIMD_flt_t half = SIMD_set((flt_t)0.5);
+ evdwl = evdwl * half;
+ evdwl = evdwl + half * ecoul + qdeng;
if (NEWTON_BOND) newton_mask = nmask;
if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i1, simd_nlocals4);
@@ -846,16 +838,16 @@ void DihedralCharmmIntel::eval(const int vflag,
ieng = evdwl;
if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i4, simd_nlocals4);
SIMD_jeng_update(newton_mask, featom, i4, ieng);
- }
- }
- if (vflag) {
+ }
+ }
+ if (VFLAG && vflag) {
spv0 = SIMD_ev_add(spv0, ev_pre * delx * delx * fpair);
- spv1 = SIMD_ev_add(spv1, ev_pre * dely * dely * fpair);
- spv2 = SIMD_ev_add(spv2, ev_pre * delz * delz * fpair);
- spv3 = SIMD_ev_add(spv3, ev_pre * delx * dely * fpair);
- spv4 = SIMD_ev_add(spv4, ev_pre * delx * delz * fpair);
- spv5 = SIMD_ev_add(spv5, ev_pre * dely * delz * fpair);
- }
+ spv1 = SIMD_ev_add(spv1, ev_pre * dely * dely * fpair);
+ spv2 = SIMD_ev_add(spv2, ev_pre * delz * delz * fpair);
+ spv3 = SIMD_ev_add(spv3, ev_pre * delx * dely * fpair);
+ spv4 = SIMD_ev_add(spv4, ev_pre * delx * delz * fpair);
+ spv5 = SIMD_ev_add(spv5, ev_pre * dely * delz * fpair);
+ }
}
if (NEWTON_BOND) newton_mask = nmask;
@@ -865,45 +857,41 @@ void DihedralCharmmIntel::eval(const int vflag,
SIMD_safe_jforce(newton_mask, pforce, i4, f4x, f4y, f4z);
} // for n
- if (EVFLAG) {
- if (EFLAG) {
- oedihedral += SIMD_sum(sedihedral);
- oecoul += SIMD_sum(secoul);
- oevdwl += SIMD_sum(sevdwl);
- }
- if (vflag) {
- ov0 += SIMD_sum(sv0);
- ov1 += SIMD_sum(sv1);
- ov2 += SIMD_sum(sv2);
- ov3 += SIMD_sum(sv3);
- ov4 += SIMD_sum(sv4);
- ov5 += SIMD_sum(sv5);
- opv0 += SIMD_sum(spv0);
- opv1 += SIMD_sum(spv1);
- opv2 += SIMD_sum(spv2);
- opv3 += SIMD_sum(spv3);
- opv4 += SIMD_sum(spv4);
- opv5 += SIMD_sum(spv5);
- }
+ if (EFLAG) {
+ oedihedral += SIMD_sum(sedihedral);
+ oecoul += SIMD_sum(secoul);
+ oevdwl += SIMD_sum(sevdwl);
+ }
+ if (VFLAG && vflag) {
+ ov0 += SIMD_sum(sv0);
+ ov1 += SIMD_sum(sv1);
+ ov2 += SIMD_sum(sv2);
+ ov3 += SIMD_sum(sv3);
+ ov4 += SIMD_sum(sv4);
+ ov5 += SIMD_sum(sv5);
+ opv0 += SIMD_sum(spv0);
+ opv1 += SIMD_sum(spv1);
+ opv2 += SIMD_sum(spv2);
+ opv3 += SIMD_sum(spv3);
+ opv4 += SIMD_sum(spv4);
+ opv5 += SIMD_sum(spv5);
}
} // omp parallel
- if (EVFLAG) {
- if (EFLAG) {
- energy += oedihedral;
- force->pair->eng_vdwl += oevdwl;
- force->pair->eng_coul += oecoul;
- }
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- force->pair->virial[0] += opv0;
- force->pair->virial[1] += opv1;
- force->pair->virial[2] += opv2;
- force->pair->virial[3] += opv3;
- force->pair->virial[4] += opv4;
- force->pair->virial[5] += opv5;
- }
+ if (EFLAG) {
+ energy += oedihedral;
+ force->pair->eng_vdwl += oevdwl;
+ force->pair->eng_coul += oecoul;
+ }
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
+ force->pair->virial[0] += opv0;
+ force->pair->virial[1] += opv1;
+ force->pair->virial[2] += opv2;
+ force->pair->virial[3] += opv3;
+ force->pair->virial[4] += opv4;
+ force->pair->virial[5] += opv5;
}
fix->set_reduce_flag();
@@ -945,7 +933,7 @@ void DihedralCharmmIntel::init_style()
template
void DihedralCharmmIntel::pack_force_const(ForceConst &fc,
- IntelBuffers *buffers)
+ IntelBuffers *buffers)
{
const int tp1 = atom->ntypes + 1;
@@ -953,12 +941,14 @@ void DihedralCharmmIntel::pack_force_const(ForceConst &fc,
fc.set_ntypes(tp1,bp1,memory);
buffers->set_ntypes(tp1);
- for (int i = 0; i < tp1; i++) {
- for (int j = 0; j < tp1; j++) {
- fc.ljp[i][j].lj1 = lj14_1[i][j];
- fc.ljp[i][j].lj2 = lj14_2[i][j];
- fc.ljp[i][j].lj3 = lj14_3[i][j];
- fc.ljp[i][j].lj4 = lj14_4[i][j];
+ if (weightflag) {
+ for (int i = 0; i < tp1; i++) {
+ for (int j = 0; j < tp1; j++) {
+ fc.ljp[i][j].lj1 = lj14_1[i][j];
+ fc.ljp[i][j].lj2 = lj14_2[i][j];
+ fc.ljp[i][j].lj3 = lj14_3[i][j];
+ fc.ljp[i][j].lj4 = lj14_4[i][j];
+ }
}
}
@@ -975,8 +965,8 @@ void DihedralCharmmIntel::pack_force_const(ForceConst &fc,
template
void DihedralCharmmIntel::ForceConst::set_ntypes(const int npairtypes,
- const int nbondtypes,
- Memory *memory) {
+ const int nbondtypes,
+ Memory *memory) {
if (npairtypes != _npairtypes) {
if (_npairtypes > 0)
_memory->destroy(ljp);
@@ -989,7 +979,7 @@ void DihedralCharmmIntel::ForceConst::set_ntypes(const int npairtypes,
_memory->destroy(bp);
_memory->destroy(weight);
}
-
+
if (nbondtypes > 0) {
_memory->create(bp,nbondtypes,"dihedralcharmmintel.bp");
_memory->create(weight,nbondtypes,"dihedralcharmmintel.weight");
diff --git a/src/USER-INTEL/dihedral_charmm_intel.h b/src/USER-INTEL/dihedral_charmm_intel.h
index 292faea9f9..d80b32c8ac 100644
--- a/src/USER-INTEL/dihedral_charmm_intel.h
+++ b/src/USER-INTEL/dihedral_charmm_intel.h
@@ -44,8 +44,8 @@ class DihedralCharmmIntel : public DihedralCharmm {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
@@ -58,7 +58,7 @@ class DihedralCharmmIntel : public DihedralCharmm {
class ForceConst {
public:
typedef struct { flt_t lj1, lj2, lj3, lj4; } fc_packed1;
- typedef struct { flt_t cos_shift, sin_shift, k;
+ typedef struct { flt_t cos_shift, sin_shift, k;
int multiplicity; } fc_packed3;
fc_packed1 **ljp;
diff --git a/src/USER-INTEL/dihedral_harmonic_intel.cpp b/src/USER-INTEL/dihedral_harmonic_intel.cpp
index 03ab152f49..196b024fa6 100644
--- a/src/USER-INTEL/dihedral_harmonic_intel.cpp
+++ b/src/USER-INTEL/dihedral_harmonic_intel.cpp
@@ -69,24 +69,24 @@ void DihedralHarmonicIntel::compute(int eflag, int vflag)
template
void DihedralHarmonicIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = 0;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -96,10 +96,10 @@ void DihedralHarmonicIntel::compute(int eflag, int vflag,
}
}
-template
-void DihedralHarmonicIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void DihedralHarmonicIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->ndihedrallist;
@@ -120,40 +120,42 @@ void DihedralHarmonicIntel::eval(const int vflag,
const int nthreads = tc;
acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5;
- if (EVFLAG) {
- if (EFLAG)
- oedihedral = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- }
+ if (EFLAG) oedihedral = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
- int nfrom, nto, tid;
+ int nfrom, npl, nto, tid;
+ #ifdef LMP_INTEL_USE_SIMDOFF
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int5_t * _noalias const dihedrallist =
+ const int5_t * _noalias const dihedrallist =
(int5_t *) neighbor->dihedrallist[0];
+ #ifdef LMP_INTEL_USE_SIMDOFF
acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5;
- if (EVFLAG) {
- if (EFLAG)
- sedihedral = (acc_t)0.0;
- if (vflag) {
- sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
- }
+ if (EFLAG) sedihedral = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
-
- for (int n = nfrom; n < nto; n++) {
+ #pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
+ for (int n = nfrom; n < nto; n ++) {
+ #else
+ for (int n = nfrom; n < nto; n += npl) {
+ #endif
const int i1 = dihedrallist[n].a;
const int i2 = dihedrallist[n].b;
const int i3 = dihedrallist[n].c;
@@ -173,7 +175,7 @@ void DihedralHarmonicIntel::eval(const int vflag,
const flt_t vb2zm = x[i2].z - x[i3].z;
// 3rd bond
-
+
const flt_t vb3x = x[i4].x - x[i3].x;
const flt_t vb3y = x[i4].y - x[i3].y;
const flt_t vb3z = x[i4].z - x[i3].z;
@@ -203,27 +205,29 @@ void DihedralHarmonicIntel::eval(const int vflag,
const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
// error check
+ #ifndef LMP_INTEL_USE_SIMDOFF
if (c > PTOLERANCE || c < MTOLERANCE) {
- int me = comm->me;
+ int me = comm->me;
- if (screen) {
- char str[128];
- sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
- TAGINT_FORMAT " " TAGINT_FORMAT " "
- TAGINT_FORMAT " " TAGINT_FORMAT,
- me,tid,update->ntimestep,
- atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
- error->warning(FLERR,str,0);
- fprintf(screen," 1st atom: %d %g %g %g\n",
- me,x[i1].x,x[i1].y,x[i1].z);
- fprintf(screen," 2nd atom: %d %g %g %g\n",
- me,x[i2].x,x[i2].y,x[i2].z);
- fprintf(screen," 3rd atom: %d %g %g %g\n",
- me,x[i3].x,x[i3].y,x[i3].z);
- fprintf(screen," 4th atom: %d %g %g %g\n",
- me,x[i4].x,x[i4].y,x[i4].z);
- }
+ if (screen) {
+ char str[128];
+ sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
+ TAGINT_FORMAT " " TAGINT_FORMAT " "
+ TAGINT_FORMAT " " TAGINT_FORMAT,
+ me,tid,update->ntimestep,
+ atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+ error->warning(FLERR,str,0);
+ fprintf(screen," 1st atom: %d %g %g %g\n",
+ me,x[i1].x,x[i1].y,x[i1].z);
+ fprintf(screen," 2nd atom: %d %g %g %g\n",
+ me,x[i2].x,x[i2].y,x[i2].z);
+ fprintf(screen," 3rd atom: %d %g %g %g\n",
+ me,x[i3].x,x[i3].y,x[i3].z);
+ fprintf(screen," 4th atom: %d %g %g %g\n",
+ me,x[i4].x,x[i4].y,x[i4].z);
+ }
}
+ #endif
if (c > (flt_t)1.0) c = (flt_t)1.0;
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
@@ -238,19 +242,19 @@ void DihedralHarmonicIntel::eval(const int vflag,
ddf1 = df1 = (flt_t)0.0;
for (int i = 0; i < m; i++) {
- ddf1 = p*c - df1*s;
- df1 = p*s + df1*c;
- p = ddf1;
+ ddf1 = p*c - df1*s;
+ df1 = p*s + df1*c;
+ p = ddf1;
}
p = p*tcos_shift + df1*tsin_shift;
df1 = df1*tcos_shift - ddf1*tsin_shift;
df1 *= -m;
p += (flt_t)1.0;
-
+
if (m == 0) {
- p = (flt_t)1.0 + tcos_shift;
- df1 = (flt_t)0.0;
+ p = (flt_t)1.0 + tcos_shift;
+ df1 = (flt_t)0.0;
}
const flt_t fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
@@ -292,56 +296,66 @@ void DihedralHarmonicIntel::eval(const int vflag,
const flt_t f3y = -sy2 - f4y;
const flt_t f3z = -sz2 - f4z;
- if (EVFLAG) {
- flt_t deng;
- if (EFLAG) deng = tk * p;
- IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, deng, i1, i2, i3, i4, f1x,
- f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x,
- vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y,
- vb3z, sedihedral, f, NEWTON_BOND, nlocal,
- sv0, sv1, sv2, sv3, sv4, sv5);
+ if (EFLAG || VFLAG) {
+ flt_t deng;
+ if (EFLAG) deng = tk * p;
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4,
+ f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
+ vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
+ vb3y, vb3z, sedihedral, f, NEWTON_BOND, nlocal,
+ sv0, sv1, sv2, sv3, sv4, sv5);
+ #else
+ IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4,
+ f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
+ vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
+ vb3y, vb3z, oedihedral, f, NEWTON_BOND, nlocal,
+ ov0, ov1, ov2, ov3, ov4, ov5);
+ #endif
}
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ #pragma simdoff
+ #endif
{
if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += f1x;
- f[i1].y += f1y;
- f[i1].z += f1z;
+ f[i1].x += f1x;
+ f[i1].y += f1y;
+ f[i1].z += f1z;
}
if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x += f2x;
- f[i2].y += f2y;
- f[i2].z += f2z;
+ f[i2].x += f2x;
+ f[i2].y += f2y;
+ f[i2].z += f2z;
}
if (NEWTON_BOND || i3 < nlocal) {
- f[i3].x += f3x;
- f[i3].y += f3y;
- f[i3].z += f3z;
+ f[i3].x += f3x;
+ f[i3].y += f3y;
+ f[i3].z += f3z;
}
if (NEWTON_BOND || i4 < nlocal) {
- f[i4].x += f4x;
- f[i4].y += f4y;
- f[i4].z += f4z;
+ f[i4].x += f4x;
+ f[i4].y += f4y;
+ f[i4].z += f4z;
}
}
} // for n
- if (EVFLAG) {
- if (EFLAG) oedihedral += sedihedral;
- if (vflag) {
- ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5;
- }
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ if (EFLAG) oedihedral += sedihedral;
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2;
+ ov3 += sv3; ov4 += sv4; ov5 += sv5;
}
+ #endif
} // omp parallel
- if (EVFLAG) {
- if (EFLAG) energy += oedihedral;
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- }
+ if (EFLAG) energy += oedihedral;
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
}
fix->set_reduce_flag();
@@ -381,7 +395,7 @@ void DihedralHarmonicIntel::init_style()
template
void DihedralHarmonicIntel::pack_force_const(ForceConst &fc,
- IntelBuffers *buffers)
+ IntelBuffers *buffers)
{
const int bp1 = atom->ndihedraltypes + 1;
fc.set_ntypes(bp1,memory);
@@ -398,11 +412,11 @@ void DihedralHarmonicIntel::pack_force_const(ForceConst &fc,
template
void DihedralHarmonicIntel::ForceConst::set_ntypes(const int nbondtypes,
- Memory *memory) {
+ Memory *memory) {
if (nbondtypes != _nbondtypes) {
if (_nbondtypes > 0)
_memory->destroy(bp);
-
+
if (nbondtypes > 0)
_memory->create(bp,nbondtypes,"dihedralcharmmintel.bp");
}
diff --git a/src/USER-INTEL/dihedral_harmonic_intel.h b/src/USER-INTEL/dihedral_harmonic_intel.h
index 41e3d20540..0a9cfaa042 100644
--- a/src/USER-INTEL/dihedral_harmonic_intel.h
+++ b/src/USER-INTEL/dihedral_harmonic_intel.h
@@ -44,8 +44,8 @@ class DihedralHarmonicIntel : public DihedralHarmonic {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
@@ -57,7 +57,7 @@ class DihedralHarmonicIntel : public DihedralHarmonic {
template
class ForceConst {
public:
- typedef struct { flt_t cos_shift, sin_shift, k;
+ typedef struct { flt_t cos_shift, sin_shift, k;
int multiplicity; } fc_packed1;
fc_packed1 *bp;
diff --git a/src/USER-INTEL/dihedral_opls_intel.cpp b/src/USER-INTEL/dihedral_opls_intel.cpp
index bfd5a53956..1abeba1d5e 100644
--- a/src/USER-INTEL/dihedral_opls_intel.cpp
+++ b/src/USER-INTEL/dihedral_opls_intel.cpp
@@ -73,24 +73,24 @@ void DihedralOPLSIntel::compute(int eflag, int vflag)
template
void DihedralOPLSIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = 0;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -100,10 +100,10 @@ void DihedralOPLSIntel::compute(int eflag, int vflag,
}
}
-template
-void DihedralOPLSIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void DihedralOPLSIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->ndihedrallist;
@@ -124,40 +124,42 @@ void DihedralOPLSIntel::eval(const int vflag,
const int nthreads = tc;
acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5;
- if (EVFLAG) {
- if (EFLAG)
- oedihedral = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- }
+ if (EFLAG) oedihedral = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
- shared(f_start,f_stride,fc) \
+ shared(f_start,f_stride,fc) \
reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
- int nfrom, nto, tid;
+ int nfrom, npl, nto, tid;
+ #ifdef LMP_INTEL_USE_SIMDOFF
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int5_t * _noalias const dihedrallist =
+ const int5_t * _noalias const dihedrallist =
(int5_t *) neighbor->dihedrallist[0];
+ #ifdef LMP_INTEL_USE_SIMDOFF
acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5;
- if (EVFLAG) {
- if (EFLAG)
- sedihedral = (acc_t)0.0;
- if (vflag) {
- sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
- }
+ if (EFLAG) sedihedral = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
}
-
- for (int n = nfrom; n < nto; n++) {
+ #pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
+ for (int n = nfrom; n < nto; n ++) {
+ #else
+ for (int n = nfrom; n < nto; n += npl) {
+ #endif
const int i1 = dihedrallist[n].a;
const int i2 = dihedrallist[n].b;
const int i3 = dihedrallist[n].c;
@@ -177,7 +179,7 @@ void DihedralOPLSIntel::eval(const int vflag,
const flt_t vb2zm = x[i2].z - x[i3].z;
// 3rd bond
-
+
const flt_t vb3x = x[i4].x - x[i3].x;
const flt_t vb3y = x[i4].y - x[i3].y;
const flt_t vb3z = x[i4].z - x[i3].z;
@@ -207,7 +209,7 @@ void DihedralOPLSIntel::eval(const int vflag,
const flt_t c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
flt_t ctmp = -vb1x*vb2xm - vb1y*vb2ym - vb1z*vb2zm;
- const flt_t r12c1 = rb1 * rb2;
+ const flt_t r12c1 = rb1 * rb2;
const flt_t c1mag = ctmp * r12c1;
ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
@@ -236,27 +238,29 @@ void DihedralOPLSIntel::eval(const int vflag,
const flt_t dx = (cx*vb3x + cy*vb3y + cz*vb3z)*cmag*rb3;
// error check
+ #ifndef LMP_INTEL_USE_SIMDOFF
if (c > PTOLERANCE || c < MTOLERANCE) {
- int me = comm->me;
+ int me = comm->me;
- if (screen) {
- char str[128];
- sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
- TAGINT_FORMAT " " TAGINT_FORMAT " "
- TAGINT_FORMAT " " TAGINT_FORMAT,
- me,tid,update->ntimestep,
- atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
- error->warning(FLERR,str,0);
- fprintf(screen," 1st atom: %d %g %g %g\n",
- me,x[i1].x,x[i1].y,x[i1].z);
- fprintf(screen," 2nd atom: %d %g %g %g\n",
- me,x[i2].x,x[i2].y,x[i2].z);
- fprintf(screen," 3rd atom: %d %g %g %g\n",
- me,x[i3].x,x[i3].y,x[i3].z);
- fprintf(screen," 4th atom: %d %g %g %g\n",
- me,x[i4].x,x[i4].y,x[i4].z);
- }
+ if (screen) {
+ char str[128];
+ sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
+ TAGINT_FORMAT " " TAGINT_FORMAT " "
+ TAGINT_FORMAT " " TAGINT_FORMAT,
+ me,tid,update->ntimestep,
+ atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+ error->warning(FLERR,str,0);
+ fprintf(screen," 1st atom: %d %g %g %g\n",
+ me,x[i1].x,x[i1].y,x[i1].z);
+ fprintf(screen," 2nd atom: %d %g %g %g\n",
+ me,x[i2].x,x[i2].y,x[i2].z);
+ fprintf(screen," 3rd atom: %d %g %g %g\n",
+ me,x[i3].x,x[i3].y,x[i3].z);
+ fprintf(screen," 4th atom: %d %g %g %g\n",
+ me,x[i4].x,x[i4].y,x[i4].z);
+ }
}
+ #endif
if (c > (flt_t)1.0) c = (flt_t)1.0;
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
@@ -279,14 +283,14 @@ void DihedralOPLSIntel::eval(const int vflag,
const flt_t sin_4phim = (flt_t)2.0 * cos_2phi * sin_2phim;
flt_t p, pd;
- p = fc.bp[type].k1*((flt_t)1.0 + c) +
- fc.bp[type].k2*((flt_t)1.0 - cos_2phi) +
- fc.bp[type].k3*((flt_t)1.0 + cos_3phi) +
- fc.bp[type].k4*((flt_t)1.0 - cos_4phi) ;
- pd = fc.bp[type].k1 -
- (flt_t)2.0 * fc.bp[type].k2 * sin_2phim +
- (flt_t)3.0 * fc.bp[type].k3 * sin_3phim -
- (flt_t)4.0 * fc.bp[type].k4 * sin_4phim;
+ p = fc.bp[type].k1*((flt_t)1.0 + c) +
+ fc.bp[type].k2*((flt_t)1.0 - cos_2phi) +
+ fc.bp[type].k3*((flt_t)1.0 + cos_3phi) +
+ fc.bp[type].k4*((flt_t)1.0 - cos_4phi) ;
+ pd = fc.bp[type].k1 -
+ (flt_t)2.0 * fc.bp[type].k2 * sin_2phim +
+ (flt_t)3.0 * fc.bp[type].k3 * sin_3phim -
+ (flt_t)4.0 * fc.bp[type].k4 * sin_4phim;
flt_t edihed;
if (EFLAG) edihed = p;
@@ -321,54 +325,64 @@ void DihedralOPLSIntel::eval(const int vflag,
const flt_t f3y = sy2 - f4y;
const flt_t f3z = sz2 - f4z;
- if (EVFLAG) {
- IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, edihed, i1, i2, i3, i4, f1x,
- f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x,
- vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y,
- vb3z, sedihedral, f, NEWTON_BOND, nlocal,
- sv0, sv1, sv2, sv3, sv4, sv5);
+ if (EFLAG || VFLAG) {
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, edihed, i1, i2, i3,
+ i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
+ vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
+ vb3y, vb3z, sedihedral, f, NEWTON_BOND, nlocal,
+ sv0, sv1, sv2, sv3, sv4, sv5);
+ #else
+ IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, edihed, i1, i2, i3,
+ i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
+ vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
+ vb3y, vb3z, oedihedral, f, NEWTON_BOND, nlocal,
+ ov0, ov1, ov2, ov3, ov4, ov5);
+ #endif
}
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ #pragma simdoff
+ #endif
{
if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += f1x;
- f[i1].y += f1y;
- f[i1].z += f1z;
+ f[i1].x += f1x;
+ f[i1].y += f1y;
+ f[i1].z += f1z;
}
if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x += f2x;
- f[i2].y += f2y;
- f[i2].z += f2z;
+ f[i2].x += f2x;
+ f[i2].y += f2y;
+ f[i2].z += f2z;
}
if (NEWTON_BOND || i3 < nlocal) {
- f[i3].x += f3x;
- f[i3].y += f3y;
- f[i3].z += f3z;
+ f[i3].x += f3x;
+ f[i3].y += f3y;
+ f[i3].z += f3z;
}
if (NEWTON_BOND || i4 < nlocal) {
- f[i4].x += f4x;
- f[i4].y += f4y;
- f[i4].z += f4z;
+ f[i4].x += f4x;
+ f[i4].y += f4y;
+ f[i4].z += f4z;
}
}
} // for n
- if (EVFLAG) {
- if (EFLAG) oedihedral += sedihedral;
- if (vflag) {
- ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5;
- }
+ #ifdef LMP_INTEL_USE_SIMDOFF
+ if (EFLAG) oedihedral += sedihedral;
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2;
+ ov3 += sv3; ov4 += sv4; ov5 += sv5;
}
+ #endif
} // omp parallel
- if (EVFLAG) {
- if (EFLAG) energy += oedihedral;
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
- }
+ if (EFLAG) energy += oedihedral;
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
}
fix->set_reduce_flag();
@@ -408,7 +422,7 @@ void DihedralOPLSIntel::init_style()
template
void DihedralOPLSIntel::pack_force_const(ForceConst &fc,
- IntelBuffers *buffers)
+ IntelBuffers *buffers)
{
const int bp1 = atom->ndihedraltypes + 1;
fc.set_ntypes(bp1,memory);
@@ -425,11 +439,11 @@ void DihedralOPLSIntel::pack_force_const(ForceConst &fc,
template
void DihedralOPLSIntel::ForceConst::set_ntypes(const int nbondtypes,
- Memory *memory) {
+ Memory *memory) {
if (nbondtypes != _nbondtypes) {
if (_nbondtypes > 0)
_memory->destroy(bp);
-
+
if (nbondtypes > 0)
_memory->create(bp,nbondtypes,"dihedralcharmmintel.bp");
}
diff --git a/src/USER-INTEL/dihedral_opls_intel.h b/src/USER-INTEL/dihedral_opls_intel.h
index ea0930f4b8..1080bfa6c3 100644
--- a/src/USER-INTEL/dihedral_opls_intel.h
+++ b/src/USER-INTEL/dihedral_opls_intel.h
@@ -44,8 +44,8 @@ class DihedralOPLSIntel : public DihedralOPLS {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp
index edd33eb72b..b06f76c90d 100644
--- a/src/USER-INTEL/fix_intel.cpp
+++ b/src/USER-INTEL/fix_intel.cpp
@@ -61,6 +61,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
int ncops = force->inumeric(FLERR,arg[3]);
_nbor_pack_width = 1;
+ _three_body_neighbor = 0;
_precision_mode = PREC_MODE_MIXED;
_offload_balance = -1.0;
@@ -178,7 +179,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
_real_space_comm = MPI_COMM_WORLD;
if (no_affinity == 0)
if (set_host_affinity(nomp) != 0)
- error->all(FLERR,"Could not set host affinity for offload tasks");
+ error->all(FLERR,"Could not set host affinity for offload tasks");
}
int max_offload_threads = 0, offload_cores = 0;
@@ -263,7 +264,7 @@ FixIntel::~FixIntel()
double *time2 = off_watch_neighbor();
int *overflow = get_off_overflow_flag();
if (_offload_balance != 0.0 && time1 != NULL && time2 != NULL &&
- overflow != NULL) {
+ overflow != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(time1,time2,overflow:alloc_if(0) free_if(1))
}
@@ -319,19 +320,25 @@ void FixIntel::init()
if (strstr(hybrid->keywords[i], "/intel") != NULL)
nstyles++;
else
- force->pair->no_virial_fdotr_compute = 1;
+ force->pair->no_virial_fdotr_compute = 1;
}
if (nstyles > 1)
error->all(FLERR,
- "Currently, cannot use more than one intel style with hybrid.");
+ "Currently, cannot use more than one intel style with hybrid.");
check_neighbor_intel();
- if (_precision_mode == PREC_MODE_SINGLE)
+ int off_mode = 0;
+ if (_offload_balance != 0.0) off_mode = 1;
+ if (_precision_mode == PREC_MODE_SINGLE) {
_single_buffers->zero_ev();
- else if (_precision_mode == PREC_MODE_MIXED)
+ _single_buffers->grow_ncache(off_mode,_nthreads);
+ } else if (_precision_mode == PREC_MODE_MIXED) {
_mixed_buffers->zero_ev();
- else
+ _mixed_buffers->grow_ncache(off_mode,_nthreads);
+ } else {
_double_buffers->zero_ev();
+ _double_buffers->grow_ncache(off_mode,_nthreads);
+ }
_need_reduce = 0;
}
@@ -342,13 +349,13 @@ void FixIntel::setup(int vflag)
{
if (neighbor->style != BIN)
error->all(FLERR,
- "Currently, neighbor style BIN must be used with Intel package.");
+ "Currently, neighbor style BIN must be used with Intel package.");
if (neighbor->exclude_setting() != 0)
error->all(FLERR,
- "Currently, cannot use neigh_modify exclude with Intel package.");
+ "Currently, cannot use neigh_modify exclude with Intel package.");
if (vflag_atom)
error->all(FLERR,
- "Cannot currently get per-atom virials with Intel package.");
+ "Cannot currently get per-atom virials with Intel package.");
#ifdef _LMP_INTEL_OFFLOAD
post_force(vflag);
#endif
@@ -367,8 +374,6 @@ void FixIntel::pair_init_check(const bool cdmessage)
{
#ifdef INTEL_VMASK
atom->sortfreq = 1;
- if (neighbor->binsizeflag && atom->userbinsize <= 0.0)
- atom->userbinsize = neighbor->binsize_user;
#endif
_nbor_pack_width = 1;
@@ -376,9 +381,8 @@ void FixIntel::pair_init_check(const bool cdmessage)
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_balance != 0.0) atom->sortfreq = 1;
- if (force->newton_pair == 0)
- _offload_noghost = 0;
- else if (_offload_ghost == 0)
+ _offload_noghost = 0;
+ if (force->newton_pair && _offload_ghost == 0)
_offload_noghost = 1;
set_offload_affinity();
@@ -388,7 +392,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
double *time2 = off_watch_neighbor();
int *overflow = get_off_overflow_flag();
if (_offload_balance !=0.0 && time1 != NULL && time2 != NULL &&
- overflow != NULL) {
+ overflow != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \
in(overflow:length(5) alloc_if(1) free_if(0))
@@ -403,7 +407,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
error->warning(FLERR, "Unknown Intel Compiler Version\n");
#else
if (__INTEL_COMPILER_BUILD_DATE != 20131008 &&
- __INTEL_COMPILER_BUILD_DATE < 20141023)
+ __INTEL_COMPILER_BUILD_DATE < 20141023)
error->warning(FLERR, "Unsupported Intel Compiler.");
#endif
#if !defined(__INTEL_COMPILER)
@@ -434,24 +438,24 @@ void FixIntel::pair_init_check(const bool cdmessage)
if (comm->me == 0) {
if (screen) {
fprintf(screen,
- "----------------------------------------------------------\n");
+ "----------------------------------------------------------\n");
if (_offload_balance != 0.0) {
fprintf(screen,"Using Intel Coprocessor with %d threads per core, ",
- _offload_tpc);
+ _offload_tpc);
fprintf(screen,"%d threads per task\n",_offload_threads);
} else {
- fprintf(screen,"Using Intel Package without Coprocessor.\n");
+ fprintf(screen,"Using Intel Package without Coprocessor.\n");
}
fprintf(screen,"Precision: %s\n",kmode);
if (cdmessage) {
- #ifdef LMP_USE_AVXCD
- fprintf(screen,"AVX512 CD Optimizations: Enabled\n");
- #else
- fprintf(screen,"AVX512 CD Optimizations: Disabled\n");
- #endif
+ #ifdef LMP_USE_AVXCD
+ fprintf(screen,"AVX512 CD Optimizations: Enabled\n");
+ #else
+ fprintf(screen,"AVX512 CD Optimizations: Disabled\n");
+ #endif
}
fprintf(screen,
- "----------------------------------------------------------\n");
+ "----------------------------------------------------------\n");
}
}
}
@@ -460,7 +464,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
void FixIntel::bond_init_check()
{
- if (_offload_balance != 0.0 && atom->molecular &&
+ if (_offload_balance != 0.0 && atom->molecular &&
force->newton_pair != force->newton_bond)
error->all(FLERR,
"USER-INTEL package requires same setting for newton bond and non-bond.");
@@ -535,24 +539,24 @@ void FixIntel::pre_reverse(int eflag, int vflag)
{
if (_force_array_m != 0) {
if (_need_reduce) {
- reduce_results(_force_array_m);
+ reduce_results(&_force_array_m[0].x);
_need_reduce = 0;
}
- add_results(_force_array_m, _ev_array_d, _results_eatom, _results_vatom, 0);
+ add_results(_force_array_m, _ev_array_d, _results_eatom, _results_vatom,0);
_force_array_m = 0;
} else if (_force_array_d != 0) {
if (_need_reduce) {
- reduce_results(_force_array_d);
+ reduce_results(&_force_array_d[0].x);
_need_reduce = 0;
}
- add_results(_force_array_d, _ev_array_d, _results_eatom, _results_vatom, 0);
+ add_results(_force_array_d, _ev_array_d, _results_eatom, _results_vatom,0);
_force_array_d = 0;
} else if (_force_array_s != 0) {
if (_need_reduce) {
- reduce_results(_force_array_s);
+ reduce_results(&_force_array_s[0].x);
_need_reduce = 0;
}
- add_results(_force_array_s, _ev_array_s, _results_eatom, _results_vatom, 0);
+ add_results(_force_array_s, _ev_array_s, _results_eatom, _results_vatom,0);
_force_array_s = 0;
}
@@ -563,47 +567,56 @@ void FixIntel::pre_reverse(int eflag, int vflag)
/* ---------------------------------------------------------------------- */
-template
-void FixIntel::reduce_results(ft * _noalias const f_start)
+template
+void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
{
int o_range, f_stride;
if (force->newton_pair)
o_range = atom->nlocal + atom->nghost;
- else
+ else
o_range = atom->nlocal;
- IP_PRE_get_stride(f_stride, o_range, sizeof(ft), lmp->atom->torque);
+ IP_PRE_get_stride(f_stride, o_range, (sizeof(acc_t)*4), lmp->atom->torque);
- #if defined(_OPENMP)
- #pragma omp parallel default(none) shared(o_range, f_stride)
- #endif
- {
- int iifrom, iito, tid;
- IP_PRE_omp_range_id_align(iifrom, iito, tid, o_range, _nthreads,
- sizeof(ft));
+ o_range *= 4;
+ const int f_stride4 = f_stride * 4;
- int t_off = f_stride;
- if (_results_eatom) {
- for (int t = 1; t < _nthreads; t++) {
- _use_simd_pragma("vector nontemporal")
- _use_simd_pragma("novector")
- for (int n = iifrom; n < iito; n++) {
- f_start[n].x += f_start[n + t_off].x;
- f_start[n].y += f_start[n + t_off].y;
- f_start[n].z += f_start[n + t_off].z;
- f_start[n].w += f_start[n + t_off].w;
- }
- t_off += f_stride;
- }
+ if (_nthreads <= INTEL_HTHREADS) {
+ acc_t *f_scalar2 = f_scalar + f_stride4;
+ if (_nthreads == 4) {
+ acc_t *f_scalar3 = f_scalar2 + f_stride4;
+ acc_t *f_scalar4 = f_scalar3 + f_stride4;
+ _use_simd_pragma("vector aligned")
+ _use_simd_pragma("simd")
+ for (int n = 0; n < o_range; n++)
+ f_scalar[n] += f_scalar2[n] + f_scalar3[n] + f_scalar4[n];
+ } else if (_nthreads == 2) {
+ _use_simd_pragma("vector aligned")
+ _use_simd_pragma("simd")
+ for (int n = 0; n < o_range; n++)
+ f_scalar[n] += f_scalar2[n];
} else {
+ acc_t *f_scalar3 = f_scalar2 + f_stride4;
+ _use_simd_pragma("vector aligned")
+ _use_simd_pragma("simd")
+ for (int n = 0; n < o_range; n++)
+ f_scalar[n] += f_scalar2[n] + f_scalar3[n];
+ }
+ } else {
+ #if defined(_OPENMP)
+ #pragma omp parallel
+ #endif
+ {
+ int iifrom, iito, tid;
+ IP_PRE_omp_range_id_align(iifrom, iito, tid, o_range, _nthreads,
+ sizeof(acc_t));
+
+ acc_t *f_scalar2 = f_scalar + f_stride4;
for (int t = 1; t < _nthreads; t++) {
- _use_simd_pragma("vector nontemporal")
- _use_simd_pragma("novector")
- for (int n = iifrom; n < iito; n++) {
- f_start[n].x += f_start[n + t_off].x;
- f_start[n].y += f_start[n + t_off].y;
- f_start[n].z += f_start[n + t_off].z;
- }
- t_off += f_stride;
+ _use_simd_pragma("vector aligned")
+ _use_simd_pragma("simd")
+ for (int n = iifrom; n < iito; n++)
+ f_scalar[n] += f_scalar2[n];
+ f_scalar2 += f_stride4;
}
}
}
@@ -635,46 +648,65 @@ template
void FixIntel::add_results(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global,
const int eatom, const int vatom,
- const int offload) {
+ const int offload) {
start_watch(TIME_PACK);
int f_length;
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) {
if (offload) {
- add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal);
if (force->newton_pair) {
- const acc_t * _noalias const enull = 0;
- int offset = _offload_nlocal;
- if (atom->torque) offset *= 2;
- add_oresults(f_in + offset, enull, eatom, vatom,
- _offload_min_ghost, _offload_nghost);
- }
+ add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal);
+ const acc_t * _noalias const enull = 0;
+ int offset = _offload_nlocal;
+ if (atom->torque) offset *= 2;
+ add_oresults(f_in + offset, enull, eatom, vatom,
+ _offload_min_ghost, _offload_nghost);
+ } else
+ add_oresults(f_in, ev_global, eatom, vatom, 0, offload_end_pair());
} else {
- add_oresults(f_in, ev_global, eatom, vatom,
- _host_min_local, _host_used_local);
if (force->newton_pair) {
- const acc_t * _noalias const enull = 0;
- int offset = _host_used_local;
- if (atom->torque) offset *= 2;
- add_oresults(f_in + offset, enull, eatom,
- vatom, _host_min_ghost, _host_used_ghost);
+ add_oresults(f_in, ev_global, eatom, vatom,
+ _host_min_local, _host_used_local);
+ const acc_t * _noalias const enull = 0;
+ int offset = _host_used_local;
+ if (atom->torque) offset *= 2;
+ add_oresults(f_in + offset, enull, eatom,
+ vatom, _host_min_ghost, _host_used_ghost);
+ } else {
+ int start = host_start_pair();
+ add_oresults(f_in, ev_global, eatom, vatom, start, atom->nlocal-start);
}
}
stop_watch(TIME_PACK);
return;
}
- if (force->newton_pair && (_offload_noghost == 0 || offload == 0))
- f_length = atom->nlocal + atom->nghost;
- else
- f_length = atom->nlocal;
+ int start;
+ if (offload) {
+ start = 0;
+ if (force->newton_pair) {
+ if (_offload_noghost == 0)
+ f_length = atom->nlocal + atom->nghost;
+ else
+ f_length = atom->nlocal;
+ } else
+ f_length = offload_end_pair();
+ } else {
+ if (force->newton_pair) {
+ start = 0;
+ f_length = atom->nlocal + atom->nghost;
+ } else {
+ start = host_start_pair();
+ f_length = atom->nlocal - start;
+ }
+ }
+ add_oresults(f_in, ev_global, eatom, vatom, start, f_length);
#else
if (force->newton_pair)
f_length = atom->nlocal + atom->nghost;
else
f_length = atom->nlocal;
- #endif
-
add_oresults(f_in, ev_global, eatom, vatom, 0, f_length);
+ #endif
stop_watch(TIME_PACK);
}
@@ -682,9 +714,9 @@ void FixIntel::add_results(const ft * _noalias const f_in,
template
void FixIntel::add_oresults(const ft * _noalias const f_in,
- const acc_t * _noalias const ev_global,
- const int eatom, const int vatom,
- const int out_offset, const int nall) {
+ const acc_t * _noalias const ev_global,
+ const int eatom, const int vatom,
+ const int out_offset, const int nall) {
lmp_ft * _noalias const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
if (atom->torque) {
if (f_in[1].w)
@@ -695,8 +727,11 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
"Sphere particles not yet supported for gayberne/intel");
}
+ int packthreads;
+ if (_nthreads > INTEL_HTHREADS) packthreads = _nthreads;
+ else packthreads = 1;
#if defined(_OPENMP)
- #pragma omp parallel default(none)
+ #pragma omp parallel if(packthreads > 1)
#endif
{
#if defined(_OPENMP)
@@ -705,16 +740,16 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
const int tid = 0;
#endif
int ifrom, ito;
- IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t));
+ IP_PRE_omp_range_align(ifrom, ito, tid, nall, packthreads, sizeof(acc_t));
if (atom->torque) {
int ii = ifrom * 2;
lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] +
- out_offset;
+ out_offset;
if (eatom) {
- double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
+ double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
#if defined(LMP_SIMD_COMPILER)
- #pragma novector
- #endif
+ #pragma novector
+ #endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[ii].x;
f[i].y += f_in[ii].y;
@@ -727,8 +762,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
}
} else {
#if defined(LMP_SIMD_COMPILER)
- #pragma novector
- #endif
+ #pragma novector
+ #endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[ii].x;
f[i].y += f_in[ii].y;
@@ -741,10 +776,10 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
}
} else {
if (eatom) {
- double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
+ double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
#if defined(LMP_SIMD_COMPILER)
- #pragma novector
- #endif
+ #pragma novector
+ #endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[i].x;
f[i].y += f_in[i].y;
@@ -753,8 +788,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
}
} else {
#if defined(LMP_SIMD_COMPILER)
- #pragma novector
- #endif
+ #pragma novector
+ #endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[i].x;
f[i].y += f_in[i].y;
@@ -833,6 +868,11 @@ void FixIntel::add_off_results(const ft * _noalias const f_in,
_offload_nlocal;
}
+ if (atom->torque)
+ if (f_in[1].w < 0.0)
+ error->all(FLERR, "Bad matrix inversion in mldivide3");
+ add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1);
+
// Load balance?
if (_offload_balance < 0.0) {
if (neighbor->ago == 0)
@@ -860,10 +900,6 @@ void FixIntel::add_off_results(const ft * _noalias const f_in,
stop_watch(TIME_IMBALANCE);
#endif
acc_timers();
- if (atom->torque)
- if (f_in[1].w < 0.0)
- error->all(FLERR, "Bad matrix inversion in mldivide3");
- add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1);
}
/* ---------------------------------------------------------------------- */
@@ -895,7 +931,7 @@ void FixIntel::output_timing_data() {
balance_out[0] = _balance_pair;
balance_out[1] = _balance_neighbor;
MPI_Reduce(balance_out, balance_in, 2, MPI_DOUBLE, MPI_SUM,
- 0, _real_space_comm);
+ 0, _real_space_comm);
balance_in[0] /= size;
balance_in[1] /= size;
@@ -922,25 +958,25 @@ void FixIntel::output_timing_data() {
balance_in[1]);
fprintf(_tscreen, " Offload Pair Balance %f\n",
balance_in[0]);
- fprintf(_tscreen, " Offload Ghost Atoms ");
- if (_offload_noghost) fprintf(_tscreen,"No\n");
- else fprintf(_tscreen,"Yes\n");
+ fprintf(_tscreen, " Offload Ghost Atoms ");
+ if (_offload_noghost) fprintf(_tscreen,"No\n");
+ else fprintf(_tscreen,"Yes\n");
#ifdef TIME_BALANCE
fprintf(_tscreen, " Offload Imbalance Seconds %f\n",
timers[TIME_IMBALANCE]);
- fprintf(_tscreen, " Offload Min/Max Seconds ");
- for (int i = 0; i < NUM_ITIMERS; i++)
- fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]);
- fprintf(_tscreen, "\n");
+ fprintf(_tscreen, " Offload Min/Max Seconds ");
+ for (int i = 0; i < NUM_ITIMERS; i++)
+ fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]);
+ fprintf(_tscreen, "\n");
#endif
- double ht = timers[TIME_HOST_NEIGHBOR] + timers[TIME_HOST_PAIR] +
- timers[TIME_OFFLOAD_WAIT];
- double ct = timers[TIME_OFFLOAD_NEIGHBOR] +
- timers[TIME_OFFLOAD_PAIR];
- double tt = MAX(ht,ct);
- if (timers[TIME_OFFLOAD_LATENCY] / tt > 0.07 && _separate_coi == 0)
- error->warning(FLERR,
- "Leaving a core free can improve performance for offload");
+ double ht = timers[TIME_HOST_NEIGHBOR] + timers[TIME_HOST_PAIR] +
+ timers[TIME_OFFLOAD_WAIT];
+ double ct = timers[TIME_OFFLOAD_NEIGHBOR] +
+ timers[TIME_OFFLOAD_PAIR];
+ double tt = MAX(ht,ct);
+ if (timers[TIME_OFFLOAD_LATENCY] / tt > 0.07 && _separate_coi == 0)
+ error->warning(FLERR,
+ "Leaving a core free can improve performance for offload");
}
fprintf(_tscreen, "------------------------------------------------\n");
}
@@ -963,14 +999,14 @@ int FixIntel::get_ppn(int &node_rank) {
node_name[name_length] = '\0';
char *node_names = new char[MPI_MAX_PROCESSOR_NAME*nprocs];
MPI_Allgather(node_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, node_names,
- MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm);
+ MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm);
int ppn = 0;
node_rank = 0;
for (int i = 0; i < nprocs; i++) {
if (strcmp(node_name, node_names + i * MPI_MAX_PROCESSOR_NAME) == 0) {
ppn++;
if (i < rank)
- node_rank++;
+ node_rank++;
}
}
@@ -1032,19 +1068,19 @@ void FixIntel::set_offload_affinity()
kmp_create_affinity_mask(&mask);
int proc = offload_threads * node_rank + tnum;
#ifdef __AVX512F__
- proc = (proc / offload_tpc) + (proc % offload_tpc) *
- ((offload_cores) / 4);
+ proc = (proc / offload_tpc) + (proc % offload_tpc) *
+ ((offload_cores) / 4);
proc += 68;
#else
if (offload_affinity_balanced)
- proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
+ proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
else
- proc += (proc / 4) * (4 - offload_tpc) + 1;
+ proc += (proc / 4) * (4 - offload_tpc) + 1;
#endif
kmp_set_affinity_mask_proc(proc, &mask);
if (kmp_set_affinity(&mask) != 0)
- printf("Could not set affinity on rank %d thread %d to %d\n",
- node_rank, tnum, proc);
+ printf("Could not set affinity on rank %d thread %d to %d\n",
+ node_rank, tnum, proc);
}
}
@@ -1074,7 +1110,7 @@ int FixIntel::set_host_affinity(const int nomp)
char cmd[512];
char readbuf[INTEL_MAX_HOST_CORE_COUNT*5];
sprintf(cmd, "lscpu -p | grep -v '#' |"
- "sort -t, -k 3,3n -k 2,2n | awk -F, '{print $1}'");
+ "sort -t, -k 3,3n -k 2,2n | awk -F, '{print $1}'");
p = popen(cmd, "r");
if (p == NULL) return -1;
ncores = 0;
@@ -1111,7 +1147,7 @@ int FixIntel::set_host_affinity(const int nomp)
if (subscription > ncores) {
if (rank == 0)
error->warning(FLERR,
- "More MPI tasks/OpenMP threads than available cores");
+ "More MPI tasks/OpenMP threads than available cores");
return 0;
}
if (subscription == ncores)
@@ -1137,10 +1173,10 @@ int FixIntel::set_host_affinity(const int nomp)
int first = coi_cores + node_rank * mpi_cores;
CPU_ZERO(&cpuset);
for (int i = first; i < first + mpi_cores; i++)
- CPU_SET(proc_list[i], &cpuset);
+ CPU_SET(proc_list[i], &cpuset);
if (sched_setaffinity(lwp, sizeof(cpu_set_t), &cpuset)) {
- fail = 1;
- break;
+ fail = 1;
+ break;
}
plwp++;
}
@@ -1153,13 +1189,13 @@ int FixIntel::set_host_affinity(const int nomp)
buf1 = (float*) malloc(sizeof(float)*pragma_size);
#pragma offload target (mic:0) mandatory \
- in(buf1:length(pragma_size) alloc_if(1) free_if(0)) \
+ in(buf1:length(pragma_size) alloc_if(1) free_if(0)) \
signal(&sig1)
{ buf1[0] = 0.0; }
#pragma offload_wait target(mic:0) wait(&sig1)
#pragma offload target (mic:0) mandatory \
- out(buf1:length(pragma_size) alloc_if(0) free_if(1)) \
+ out(buf1:length(pragma_size) alloc_if(0) free_if(1)) \
signal(&sig2)
{ buf1[0] = 1.0; }
#pragma offload_wait target(mic:0) wait(&sig2)
@@ -1175,11 +1211,11 @@ int FixIntel::set_host_affinity(const int nomp)
CPU_ZERO(&cpuset);
for(int i=0; i 0) return 1;
return 0;
}
- inline void set_reduce_flag() { _need_reduce = 1; }
+ inline void set_reduce_flag() { if (_nthreads > 1) _need_reduce = 1; }
inline int lrt() {
if (force->kspace_match("pppm/intel", 0)) return _lrt;
else return 0;
}
+ inline int pppm_table() {
+ if (force->kspace_match("pppm/intel", 0) ||
+ force->kspace_match("pppm/disp/intel",0))
+ return INTEL_P3M_TABLE;
+ else return 0;
+ }
+
protected:
IntelBuffers *_single_buffers;
IntelBuffers *_mixed_buffers;
IntelBuffers *_double_buffers;
- int _precision_mode, _nthreads, _nbor_pack_width;
+ int _precision_mode, _nthreads, _nbor_pack_width, _three_body_neighbor;
public:
inline int* get_overflow_flag() { return _overflow_flag; }
@@ -94,17 +103,17 @@ class FixIntel : public Fix {
inline void add_result_array(IntelBuffers::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom = 0, const int vatom = 0,
- const int rflag = 0);
+ const int rflag = 0);
inline void add_result_array(IntelBuffers::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom = 0, const int vatom = 0,
- const int rflag = 0);
+ const int rflag = 0);
inline void add_result_array(IntelBuffers::vec3_acc_t *f_in,
float *ev_in, const int offload,
const int eatom = 0, const int vatom = 0,
- const int rflag = 0);
+ const int rflag = 0);
inline void get_buffern(const int offload, int &nlocal, int &nall,
- int &minlocal);
+ int &minlocal);
#ifdef _LMP_INTEL_OFFLOAD
void post_force(int vflag);
@@ -204,13 +213,13 @@ class FixIntel : public Fix {
inline void add_results(const ft * _noalias const f_in,
const acc_t * _noalias const ev_global,
const int eatom, const int vatom,
- const int offload);
+ const int offload);
template
inline void add_oresults(const ft * _noalias const f_in,
- const acc_t * _noalias const ev_global,
- const int eatom, const int vatom,
- const int out_offset, const int nall);
+ const acc_t * _noalias const ev_global,
+ const int eatom, const int vatom,
+ const int out_offset, const int nall);
int _offload_affinity_balanced, _offload_threads, _offload_tpc;
#ifdef _LMP_INTEL_OFFLOAD
@@ -226,22 +235,25 @@ class FixIntel : public Fix {
/* ---------------------------------------------------------------------- */
void FixIntel::get_buffern(const int offload, int &nlocal, int &nall,
- int &minlocal) {
+ int &minlocal) {
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) {
if (offload) {
if (neighbor->ago != 0) {
- nlocal = _offload_nlocal;
- nall = _offload_nall;
+ nlocal = _offload_nlocal;
+ nall = _offload_nall;
} else {
- nlocal = atom->nlocal;
- nall = nlocal + atom->nghost;
+ nlocal = atom->nlocal;
+ nall = nlocal + atom->nghost;
}
minlocal = 0;
} else {
nlocal = atom->nlocal;
nall = _host_nall;
- minlocal = _host_min_local;
+ if (force->newton)
+ minlocal = _host_min_local;
+ else
+ minlocal = host_start_pair();
}
return;
}
@@ -259,7 +271,7 @@ void FixIntel::get_buffern(const int offload, int &nlocal, int &nall,
void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom, const int vatom,
- const int rflag) {
+ const int rflag) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_off_results_eatom = eatom;
@@ -275,7 +287,7 @@ void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in,
_results_eatom = eatom;
_results_vatom = vatom;
#ifndef _LMP_INTEL_OFFLOAD
- if (rflag != 2 && _nthreads > 1) _need_reduce = 1;
+ if (rflag != 2 && _nthreads > 1 && force->newton) _need_reduce = 1;
#endif
if (_overflow_flag[LMP_OVERFLOW])
@@ -287,7 +299,7 @@ void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in,
void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom, const int vatom,
- const int rflag) {
+ const int rflag) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_off_results_eatom = eatom;
@@ -303,7 +315,7 @@ void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in,
_results_eatom = eatom;
_results_vatom = vatom;
#ifndef _LMP_INTEL_OFFLOAD
- if (rflag != 2 && _nthreads > 1) _need_reduce = 1;
+ if (rflag != 2 && _nthreads > 1 && force->newton) _need_reduce = 1;
#endif
if (_overflow_flag[LMP_OVERFLOW])
@@ -331,7 +343,7 @@ void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in,
_results_eatom = eatom;
_results_vatom = vatom;
#ifndef _LMP_INTEL_OFFLOAD
- if (rflag != 2 && _nthreads > 1) _need_reduce = 1;
+ if (rflag != 2 && _nthreads > 1 && force->newton) _need_reduce = 1;
#endif
if (_overflow_flag[LMP_OVERFLOW])
@@ -349,12 +361,12 @@ int FixIntel::offload_end_neighbor() {
if (atom->nlocal < 2)
error->one(FLERR,"Too few atoms for load balancing offload");
double granularity = 1.0 / atom->nlocal;
- if (_balance_neighbor < granularity)
+ if (_balance_neighbor < granularity)
_balance_neighbor = granularity + 1e-10;
- else if (_balance_neighbor > 1.0 - granularity)
+ else if (_balance_neighbor > 1.0 - granularity)
_balance_neighbor = 1.0 - granularity + 1e-10;
}
- return _balance_neighbor * atom->nlocal;
+ return _balance_neighbor * atom->nlocal;
}
int FixIntel::offload_end_pair() {
@@ -505,7 +517,7 @@ The newton setting must be the same for both pairwise and bonded forces.
E: Intel styles for bond/angle/dihedral/improper require intel pair style."
-You cannot use the USER-INTEL package for bond calculations without a
+You cannot use the USER-INTEL package for bond calculations without a
USER-INTEL supported pair style.
E: Intel styles for kspace require intel pair style.
diff --git a/src/USER-INTEL/fix_nh_intel.cpp b/src/USER-INTEL/fix_nh_intel.cpp
index 3f76e53c1f..6e44b38ef1 100644
--- a/src/USER-INTEL/fix_nh_intel.cpp
+++ b/src/USER-INTEL/fix_nh_intel.cpp
@@ -45,7 +45,7 @@ typedef struct { double x,y,z; } dbl3_t;
NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion
---------------------------------------------------------------------- */
-FixNHIntel::FixNHIntel(LAMMPS *lmp, int narg, char **arg) :
+FixNHIntel::FixNHIntel(LAMMPS *lmp, int narg, char **arg) :
FixNH(lmp, narg, arg)
{
_dtfm = 0;
@@ -118,12 +118,12 @@ void FixNHIntel::remap()
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & dilate_group_bit) {
- const double d0 = x[i].x - b0;
- const double d1 = x[i].y - b1;
- const double d2 = x[i].z - b2;
- x[i].x = hi0*d0 + hi5*d1 + hi4*d2;
- x[i].y = hi1*d1 + hi3*d2;
- x[i].z = hi2*d2;
+ const double d0 = x[i].x - b0;
+ const double d1 = x[i].y - b1;
+ const double d2 = x[i].z - b2;
+ x[i].x = hi0*d0 + hi5*d1 + hi4*d2;
+ x[i].y = hi1*d1 + hi3*d2;
+ x[i].z = hi2*d2;
}
}
}
@@ -294,9 +294,9 @@ void FixNHIntel::remap()
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & dilate_group_bit) {
- x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
- x[i].y = h1*x[i].y + h3*x[i].z + nb1;
- x[i].z = h2*x[i].z + nb2;
+ x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
+ x[i].y = h1*x[i].y + h3*x[i].z + nb1;
+ x[i].z = h2*x[i].z + nb2;
}
}
}
@@ -318,7 +318,7 @@ void FixNHIntel::reset_dt()
dto = dthalf;
// If using respa, then remap is performed in innermost level
-
+
if (strstr(update->integrate_style,"respa"))
dto = 0.5*step_respa[0];
@@ -329,7 +329,7 @@ void FixNHIntel::reset_dt()
tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain);
const int * const mask = atom->mask;
- const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
+ const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
atom->nlocal;
if (nlocal > _nlocal_max) {
@@ -345,9 +345,9 @@ void FixNHIntel::reset_dt()
const double * const rmass = atom->rmass;
int n = 0;
for (int i = 0; i < nlocal; i++) {
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
}
} else {
const double * const mass = atom->mass;
@@ -364,29 +364,29 @@ void FixNHIntel::reset_dt()
const double * const rmass = atom->rmass;
int n = 0;
for (int i = 0; i < nlocal; i++)
- if (mask[i] & groupbit) {
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
+ if (mask[i] & groupbit) {
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
} else {
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- }
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ }
} else {
const double * const mass = atom->mass;
const int * const type = atom->type;
int n = 0;
for (int i = 0; i < nlocal; i++)
- if (mask[i] & groupbit) {
- _dtfm[n++] = dtf / mass[type[i]];
- _dtfm[n++] = dtf / mass[type[i]];
- _dtfm[n++] = dtf / mass[type[i]];
+ if (mask[i] & groupbit) {
+ _dtfm[n++] = dtf / mass[type[i]];
+ _dtfm[n++] = dtf / mass[type[i]];
+ _dtfm[n++] = dtf / mass[type[i]];
} else {
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- }
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ }
}
}
}
@@ -431,9 +431,9 @@ void FixNHIntel::nh_v_press()
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
- v[i].x *= f0;
- v[i].y *= f1;
- v[i].z *= f2;
+ v[i].x *= f0;
+ v[i].y *= f1;
+ v[i].z *= f2;
}
}
}
@@ -506,7 +506,7 @@ void FixNHIntel::nh_v_temp()
#pragma simd
#endif
for (int i = 0; i < _nlocal3; i++)
- v[i] *= factor_eta;
+ v[i] *= factor_eta;
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
@@ -514,12 +514,12 @@ void FixNHIntel::nh_v_temp()
#endif
for (int i = 0; i < _nlocal3; i++) {
if (_dtfm[i] != 0.0)
- v[i] *= factor_eta;
+ v[i] *= factor_eta;
}
}
}
-double FixNHIntel::memory_usage()
+double FixNHIntel::memory_usage()
{
return FixNH::memory_usage() + _nlocal_max * 3 * sizeof(double);
}
diff --git a/src/USER-INTEL/fix_nh_intel.h b/src/USER-INTEL/fix_nh_intel.h
index 32ed6c8534..cc6ba8c481 100644
--- a/src/USER-INTEL/fix_nh_intel.h
+++ b/src/USER-INTEL/fix_nh_intel.h
@@ -35,7 +35,7 @@ class FixNHIntel : public FixNH {
int _nlocal3, _nlocal_max;
virtual void remap();
- virtual void nve_x();
+ virtual void nve_x();
virtual void nve_v();
virtual void nh_v_press();
virtual void nh_v_temp();
diff --git a/src/USER-INTEL/fix_nve_asphere_intel.cpp b/src/USER-INTEL/fix_nve_asphere_intel.cpp
index 6563165454..8ad63f7326 100644
--- a/src/USER-INTEL/fix_nve_asphere_intel.cpp
+++ b/src/USER-INTEL/fix_nve_asphere_intel.cpp
@@ -36,7 +36,7 @@ using namespace FixConst;
/* ---------------------------------------------------------------------- */
FixNVEAsphereIntel::FixNVEAsphereIntel(LAMMPS *lmp, int narg, char **arg) :
- FixNVE(lmp, narg, arg)
+ FixNVE(lmp, narg, arg)
{
_dtfm = 0;
_nlocal3 = 0;
@@ -129,9 +129,9 @@ void FixNVEAsphereIntel::initial_integrate(int vflag)
#endif
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
- double *quat = bonus[ellipsoid[i]].quat;
- ME_omega_richardson(dtf, dtq, angmom[i], quat, torque[i], _inertia0[i],
- _inertia1[i], _inertia2[i]);
+ double *quat = bonus[ellipsoid[i]].quat;
+ ME_omega_richardson(dtf, dtq, angmom[i], quat, torque[i], _inertia0[i],
+ _inertia1[i], _inertia2[i]);
}
}
}
@@ -168,7 +168,7 @@ void FixNVEAsphereIntel::reset_dt() {
dtf = 0.5 * update->dt * force->ftm2v;
const int * const mask = atom->mask;
- const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
+ const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
atom->nlocal;
if (nlocal > _nlocal_max) {
@@ -211,27 +211,27 @@ void FixNVEAsphereIntel::reset_dt() {
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
_dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
- double *shape = bonus[ellipsoid[i]].shape;
- double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
- if (idot != 0.0) idot = 1.0 / idot;
- _inertia0[i] = idot;
- idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
- if (idot != 0.0) idot = 1.0 / idot;
- _inertia1[i] = idot;
- idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
- if (idot != 0.0) idot = 1.0 / idot;
- _inertia2[i] = idot;
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
+ double *shape = bonus[ellipsoid[i]].shape;
+ double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
+ if (idot != 0.0) idot = 1.0 / idot;
+ _inertia0[i] = idot;
+ idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
+ if (idot != 0.0) idot = 1.0 / idot;
+ _inertia1[i] = idot;
+ idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
+ if (idot != 0.0) idot = 1.0 / idot;
+ _inertia2[i] = idot;
} else {
_dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
}
}
}
}
-double FixNVEAsphereIntel::memory_usage()
+double FixNVEAsphereIntel::memory_usage()
{
return FixNVE::memory_usage() + _nlocal_max * 12 * sizeof(double);
}
diff --git a/src/USER-INTEL/fix_nve_intel.cpp b/src/USER-INTEL/fix_nve_intel.cpp
index 3fb290b3ab..c0f6da06ae 100644
--- a/src/USER-INTEL/fix_nve_intel.cpp
+++ b/src/USER-INTEL/fix_nve_intel.cpp
@@ -29,7 +29,7 @@ using namespace FixConst;
/* ---------------------------------------------------------------------- */
FixNVEIntel::FixNVEIntel(LAMMPS *lmp, int narg, char **arg) :
- FixNVE(lmp, narg, arg)
+ FixNVE(lmp, narg, arg)
{
_dtfm = 0;
_nlocal3 = 0;
@@ -91,7 +91,7 @@ void FixNVEIntel::initial_integrate(int vflag)
for (int i = 0; i < _nlocal3; i++) {
if (_dtfm[i] != 0.0) {
v[i] += _dtfm[i] * f[i];
- x[i] += dtv * v[i];
+ x[i] += dtv * v[i];
}
}
}
@@ -130,7 +130,7 @@ void FixNVEIntel::reset_dt() {
dtf = 0.5 * update->dt * force->ftm2v;
const int * const mask = atom->mask;
- const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
+ const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
atom->nlocal;
if (nlocal > _nlocal_max) {
@@ -146,9 +146,9 @@ void FixNVEIntel::reset_dt() {
const double * const rmass = atom->rmass;
int n = 0;
for (int i = 0; i < nlocal; i++) {
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
}
} else {
const double * const mass = atom->mass;
@@ -165,34 +165,34 @@ void FixNVEIntel::reset_dt() {
const double * const rmass = atom->rmass;
int n = 0;
for (int i = 0; i < nlocal; i++)
- if (mask[i] & groupbit) {
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
- _dtfm[n++] = dtf / rmass[i];
+ if (mask[i] & groupbit) {
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
+ _dtfm[n++] = dtf / rmass[i];
} else {
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- }
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ }
} else {
const double * const mass = atom->mass;
const int * const type = atom->type;
int n = 0;
for (int i = 0; i < nlocal; i++)
- if (mask[i] & groupbit) {
- _dtfm[n++] = dtf / mass[type[i]];
- _dtfm[n++] = dtf / mass[type[i]];
- _dtfm[n++] = dtf / mass[type[i]];
+ if (mask[i] & groupbit) {
+ _dtfm[n++] = dtf / mass[type[i]];
+ _dtfm[n++] = dtf / mass[type[i]];
+ _dtfm[n++] = dtf / mass[type[i]];
} else {
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- _dtfm[n++] = 0.0;
- }
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ _dtfm[n++] = 0.0;
+ }
}
}
}
-double FixNVEIntel::memory_usage()
+double FixNVEIntel::memory_usage()
{
return FixNVE::memory_usage() + _nlocal_max * 3 * sizeof(double);
}
diff --git a/src/USER-INTEL/improper_cvff_intel.cpp b/src/USER-INTEL/improper_cvff_intel.cpp
index 0fb02420b9..dc9765d913 100644
--- a/src/USER-INTEL/improper_cvff_intel.cpp
+++ b/src/USER-INTEL/improper_cvff_intel.cpp
@@ -42,7 +42,7 @@ typedef struct { int a,b,c,d,t; } int5_t;
/* ---------------------------------------------------------------------- */
-ImproperCvffIntel::ImproperCvffIntel(LAMMPS *lmp) :
+ImproperCvffIntel::ImproperCvffIntel(LAMMPS *lmp) :
ImproperCvff(lmp)
{
suffix_flag |= Suffix::INTEL;
@@ -80,23 +80,23 @@ void ImproperCvffIntel::compute(int eflag, int vflag)
template
void ImproperCvffIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = 0;
if (evflag) {
- if (eflag) {
+ if (vflag && !eflag) {
if (force->newton_bond)
- eval<1,1,1>(vflag, buffers, fc);
+ eval<0,1,1>(vflag, buffers, fc);
else
- eval<1,1,0>(vflag, buffers, fc);
+ eval<0,1,0>(vflag, buffers, fc);
} else {
if (force->newton_bond)
- eval<1,0,1>(vflag, buffers, fc);
+ eval<1,1,1>(vflag, buffers, fc);
else
- eval<1,0,0>(vflag, buffers, fc);
+ eval<1,1,0>(vflag, buffers, fc);
}
} else {
if (force->newton_bond)
@@ -108,10 +108,10 @@ void ImproperCvffIntel::compute(int eflag, int vflag,
/* ---------------------------------------------------------------------- */
-template
-void ImproperCvffIntel::eval(const int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+template
+void ImproperCvffIntel::eval(const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
{
const int inum = neighbor->nimproperlist;
if (inum == 0) return;
@@ -131,12 +131,9 @@ void ImproperCvffIntel::eval(const int vflag,
const int nthreads = tc;
acc_t oeimproper, ov0, ov1, ov2, ov3, ov4, ov5;
- if (EVFLAG) {
- if (EFLAG)
- oeimproper = (acc_t)0.0;
- if (vflag) {
- ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
- }
+ if (EFLAG) oeimproper = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
}
#if defined(_OPENMP)
@@ -145,17 +142,31 @@ void ImproperCvffIntel::eval(const int vflag,
reduction(+:oeimproper,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
- int nfrom, nto, tid;
+ int nfrom, npl, nto, tid;
+ #ifdef LMP_INTEL_USE_SIMDOFF_FIX
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+ #else
+ IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+ #endif
FORCE_T * _noalias const f = f_start + (tid * f_stride);
if (fix->need_zero(tid))
memset(f, 0, f_stride * sizeof(FORCE_T));
- const int5_t * _noalias const improperlist =
+ const int5_t * _noalias const improperlist =
(int5_t *) neighbor->improperlist[0];
+ #ifdef LMP_INTEL_USE_SIMDOFF_FIX
+ acc_t seimproper, sv0, sv1, sv2, sv3, sv4, sv5;
+ if (EFLAG) seimproper = (acc_t)0.0;
+ if (VFLAG && vflag) {
+ sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
+ }
+ #pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
for (int n = nfrom; n < nto; n++) {
+ #else
+ for (int n = nfrom; n < nto; n += npl) {
+ #endif
const int i1 = improperlist[n].a;
const int i2 = improperlist[n].b;
const int i3 = improperlist[n].c;
@@ -216,28 +227,29 @@ void ImproperCvffIntel::eval(const int vflag,
flt_t c = (c0 + c1mag*c2mag) * s12;
// error check
-
+ #ifndef LMP_INTEL_USE_SIMDOFF_FIX
if (c > PTOLERANCE || c < MTOLERANCE) {
int me;
- MPI_Comm_rank(world,&me);
- if (screen) {
+ MPI_Comm_rank(world,&me);
+ if (screen) {
char str[128];
- sprintf(str,"Improper problem: %d " BIGINT_FORMAT " "
+ sprintf(str,"Improper problem: %d " BIGINT_FORMAT " "
TAGINT_FORMAT " " TAGINT_FORMAT " "
TAGINT_FORMAT " " TAGINT_FORMAT,
me,update->ntimestep,
atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
- error->warning(FLERR,str,0);
- fprintf(screen," 1st atom: %d %g %g %g\n",
+ error->warning(FLERR,str,0);
+ fprintf(screen," 1st atom: %d %g %g %g\n",
me,x[i1].x,x[i1].y,x[i1].z);
- fprintf(screen," 2nd atom: %d %g %g %g\n",
+ fprintf(screen," 2nd atom: %d %g %g %g\n",
me,x[i2].x,x[i2].y,x[i2].z);
- fprintf(screen," 3rd atom: %d %g %g %g\n",
+ fprintf(screen," 3rd atom: %d %g %g %g\n",
me,x[i3].x,x[i3].y,x[i3].z);
- fprintf(screen," 4th atom: %d %g %g %g\n",
+ fprintf(screen," 4th atom: %d %g %g %g\n",
me,x[i4].x,x[i4].y,x[i4].z);
}
}
+ #endif
if (c > (flt_t)1.0) c = (flt_t)1.0;
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
@@ -250,36 +262,41 @@ void ImproperCvffIntel::eval(const int vflag,
const int m = fc.fc[type].multiplicity;
flt_t p, pd;
- if (m == 2) {
- p = (flt_t)2.0*c*c;
- pd = (flt_t)2.0*c;
- } else if (m == 3) {
- const flt_t rc2 = c*c;
- p = ((flt_t)4.0*rc2-(flt_t)3.0)*c + (flt_t)1.0;
- pd = (flt_t)6.0*rc2 - (flt_t)1.5;
- } else if (m == 4) {
- const flt_t rc2 = c*c;
- p = (flt_t)8.0*(rc2-1)*rc2 + (flt_t)2.0;
- pd = ((flt_t)16.0*rc2-(flt_t)8.0)*c;
- } else if (m == 6) {
- const flt_t rc2 = c*c;
- p = (((flt_t)32.0*rc2-(flt_t)48.0)*rc2 + (flt_t)18.0)*rc2;
- pd = ((flt_t)96.0*(rc2-(flt_t)1.0)*rc2 + (flt_t)18.0)*c;
- } else if (m == 1) {
- p = c + (flt_t)1.0;
- pd = (flt_t)0.5;
- } else if (m == 5) {
- const flt_t rc2 = c*c;
- p = (((flt_t)16.0*rc2-(flt_t)20.0)*rc2 + (flt_t)5.0)*c + (flt_t)1.0;
- pd = ((flt_t)40.0*rc2-(flt_t)30.0)*rc2 + (flt_t)2.5;
- } else if (m == 0) {
- p = (flt_t)2.0;
- pd = (flt_t)0.0;
+ #ifdef LMP_INTEL_USE_SIMDOFF_FIX
+ #pragma simdoff
+ #endif
+ {
+ if (m == 2) {
+ p = (flt_t)2.0*c*c;
+ pd = (flt_t)2.0*c;
+ } else if (m == 3) {
+ const flt_t rc2 = c*c;
+ p = ((flt_t)4.0*rc2-(flt_t)3.0)*c + (flt_t)1.0;
+ pd = (flt_t)6.0*rc2 - (flt_t)1.5;
+ } else if (m == 4) {
+ const flt_t rc2 = c*c;
+ p = (flt_t)8.0*(rc2-1)*rc2 + (flt_t)2.0;
+ pd = ((flt_t)16.0*rc2-(flt_t)8.0)*c;
+ } else if (m == 6) {
+ const flt_t rc2 = c*c;
+ p = (((flt_t)32.0*rc2-(flt_t)48.0)*rc2 + (flt_t)18.0)*rc2;
+ pd = ((flt_t)96.0*(rc2-(flt_t)1.0)*rc2 + (flt_t)18.0)*c;
+ } else if (m == 1) {
+ p = c + (flt_t)1.0;
+ pd = (flt_t)0.5;
+ } else if (m == 5) {
+ const flt_t rc2 = c*c;
+ p = (((flt_t)16.0*rc2-(flt_t)20.0)*rc2 + (flt_t)5.0)*c + (flt_t)1.0;
+ pd = ((flt_t)40.0*rc2-(flt_t)30.0)*rc2 + (flt_t)2.5;
+ } else if (m == 0) {
+ p = (flt_t)2.0;
+ pd = (flt_t)0.0;
+ }
}
if (fc.fc[type].sign == -1) {
- p = (flt_t)2.0 - p;
- pd = -pd;
+ p = (flt_t)2.0 - p;
+ pd = -pd;
}
flt_t eimproper;
@@ -317,46 +334,63 @@ void ImproperCvffIntel::eval(const int vflag,
// apply force to each of 4 atoms
- if (NEWTON_BOND || i1 < nlocal) {
- f[i1].x += f1x;
- f[i1].y += f1y;
- f[i1].z += f1z;
+ #ifdef LMP_INTEL_USE_SIMDOFF_FIX
+ #pragma simdoff
+ #endif
+ {
+ if (NEWTON_BOND || i1 < nlocal) {
+ f[i1].x += f1x;
+ f[i1].y += f1y;
+ f[i1].z += f1z;
+ }
+
+ if (NEWTON_BOND || i2 < nlocal) {
+ f[i2].x += f2x;
+ f[i2].y += f2y;
+ f[i2].z += f2z;
+ }
+
+ if (NEWTON_BOND || i3 < nlocal) {
+ f[i3].x += f3x;
+ f[i3].y += f3y;
+ f[i3].z += f3z;
+ }
+
+ if (NEWTON_BOND || i4 < nlocal) {
+ f[i4].x += f4x;
+ f[i4].y += f4y;
+ f[i4].z += f4z;
+ }
}
- if (NEWTON_BOND || i2 < nlocal) {
- f[i2].x += f2x;
- f[i2].y += f2y;
- f[i2].z += f2z;
- }
-
- if (NEWTON_BOND || i3 < nlocal) {
- f[i3].x += f3x;
- f[i3].y += f3y;
- f[i3].z += f3z;
- }
-
- if (NEWTON_BOND || i4 < nlocal) {
- f[i4].x += f4x;
- f[i4].y += f4y;
- f[i4].z += f4z;
- }
-
- if (EVFLAG) {
- IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, eimproper, i1, i2, i3, i4,
- f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
- vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
- vb3y, vb3z, oeimproper, f, NEWTON_BOND, nlocal,
- ov0, ov1, ov2, ov3, ov4, ov5);
+ if (EFLAG || VFLAG) {
+ #ifdef LMP_INTEL_USE_SIMDOFF_FIX
+ IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2,
+ i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y,
+ f4z, vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm,
+ vb3x, vb3y, vb3z, seimproper, f, NEWTON_BOND,
+ nlocal, sv0, sv1, sv2, sv3, sv4, sv5);
+ #else
+ IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2,
+ i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y,
+ f4z, vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm,
+ vb3x, vb3y, vb3z, oeimproper, f, NEWTON_BOND,
+ nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
+ #endif
}
} // for n
- } // omp parallel
- if (EVFLAG) {
- if (EFLAG)
- energy += oeimproper;
- if (vflag) {
- virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
- virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
+ #ifdef LMP_INTEL_USE_SIMDOFF_FIX
+ if (EFLAG) oeimproper += seimproper;
+ if (VFLAG && vflag) {
+ ov0 += sv0; ov1 += sv1; ov2 += sv2;
+ ov3 += sv3; ov4 += sv4; ov5 += sv5;
}
+ #endif
+ } // omp parallel
+ if (EFLAG) energy += oeimproper;
+ if (VFLAG && vflag) {
+ virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+ virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
}
fix->set_reduce_flag();
@@ -394,7 +428,7 @@ void ImproperCvffIntel::init_style()
template
void ImproperCvffIntel::pack_force_const(ForceConst &fc,
- IntelBuffers *buffers)
+ IntelBuffers *buffers)
{
const int bp1 = atom->nimpropertypes + 1;
fc.set_ntypes(bp1,memory);
@@ -410,11 +444,11 @@ void ImproperCvffIntel::pack_force_const(ForceConst &fc,
template
void ImproperCvffIntel::ForceConst::set_ntypes(const int nimproper,
- Memory *memory) {
+ Memory *memory) {
if (nimproper != _nimpropertypes) {
if (_nimpropertypes > 0)
_memory->destroy(fc);
-
+
if (nimproper > 0)
_memory->create(fc,nimproper,"improperharmonicintel.fc");
}
diff --git a/src/USER-INTEL/improper_cvff_intel.h b/src/USER-INTEL/improper_cvff_intel.h
index 95ccd8f9d2..cb5da25f99 100644
--- a/src/USER-INTEL/improper_cvff_intel.h
+++ b/src/USER-INTEL/improper_cvff_intel.h
@@ -45,8 +45,8 @@ class ImproperCvffIntel : public ImproperCvff {
void compute(int eflag, int vflag, IntelBuffers *buffers,
const ForceConst &fc);
template
- void eval(const int vflag, IntelBuffers * buffers,
- const ForceConst &fc);
+ void eval(const int vflag, IntelBuffers * buffers,
+ const ForceConst &fc);
template
void pack_force_const(ForceConst &fc,
IntelBuffers *buffers);
diff --git a/src/USER-INTEL/improper_harmonic_intel.cpp b/src/USER-INTEL/improper_harmonic_intel.cpp
index 071ff548ea..fe0efca5ec 100644
--- a/src/USER-INTEL/improper_harmonic_intel.cpp
+++ b/src/USER-INTEL/improper_harmonic_intel.cpp
@@ -43,7 +43,7 @@ typedef struct { int a,b,c,d,t; } int5_t;
/* ---------------------------------------------------------------------- */
-ImproperHarmonicIntel::ImproperHarmonicIntel(LAMMPS *lmp) :
+ImproperHarmonicIntel::ImproperHarmonicIntel(LAMMPS *lmp) :
ImproperHarmonic(lmp)
{
suffix_flag |= Suffix::INTEL;
@@ -81,23 +81,23 @@ void ImproperHarmonicIntel::compute(int eflag, int vflag)
template
void ImproperHarmonicIntel::compute(int eflag, int vflag,
- IntelBuffers *buffers,
- const ForceConst &fc)
+ IntelBuffers